diff --git a/python/ray/experimental/tf_utils.py b/python/ray/experimental/tf_utils.py
index c528d94d6..6677161a4 100644
--- a/python/ray/experimental/tf_utils.py
+++ b/python/ray/experimental/tf_utils.py
@@ -4,7 +4,7 @@ import numpy as np
 from ray.rllib.utils import force_list
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 def unflatten(vector, shapes):
@@ -79,24 +79,29 @@ class TensorFlowVariables:
                 variable_names.append(tf_obj.node_def.name)
         self.variables = OrderedDict()
         variable_list = [
-            v for v in tf.global_variables()
+            v for v in tf1.global_variables()
             if v.op.node_def.name in variable_names
         ]
         if input_variables is not None:
             variable_list += input_variables
-        for v in variable_list:
-            self.variables[v.op.node_def.name] = v
 
-        self.placeholders = {}
-        self.assignment_nodes = {}
+        if not tf1.executing_eagerly():
+            for v in variable_list:
+                self.variables[v.op.node_def.name] = v
 
-        # Create new placeholders to put in custom weights.
-        for k, var in self.variables.items():
-            self.placeholders[k] = tf.placeholder(
-                var.value().dtype,
-                var.get_shape().as_list(),
-                name="Placeholder_" + k)
-            self.assignment_nodes[k] = var.assign(self.placeholders[k])
+            self.placeholders = {}
+            self.assignment_nodes = {}
+
+            # Create new placeholders to put in custom weights.
+            for k, var in self.variables.items():
+                self.placeholders[k] = tf1.placeholder(
+                    var.value().dtype,
+                    var.get_shape().as_list(),
+                    name="Placeholder_" + k)
+                self.assignment_nodes[k] = var.assign(self.placeholders[k])
+        else:
+            for v in variable_list:
+                self.variables[v.name] = v
 
     def set_session(self, sess):
         """Sets the current session used by the class.
@@ -117,10 +122,12 @@ class TensorFlowVariables:
 
     def _check_sess(self):
         """Checks if the session is set, and if not throw an error message."""
-        assert self.sess is not None, ("The session is not set. Set the "
-                                       "session either by passing it into the "
-                                       "TensorFlowVariables constructor or by "
-                                       "calling set_session(sess).")
+        if tf1.executing_eagerly():
+            return
+        assert self.sess is not None, \
+            "The session is not set. Set the session either by passing it " \
+            "into the TensorFlowVariables constructor or by calling " \
+            "set_session(sess)."
 
     def get_flat(self):
         """Gets the weights and returns them as a flat array.
@@ -129,6 +136,11 @@ class TensorFlowVariables:
             1D Array containing the flattened weights.
         """
         self._check_sess()
+        # Eager mode.
+        if not self.sess:
+            return np.concatenate(
+                [v.numpy().flatten() for v in self.variables.values()])
+        # Graph mode.
         return np.concatenate([
             v.eval(session=self.sess).flatten()
             for v in self.variables.values()
@@ -147,12 +159,16 @@ class TensorFlowVariables:
         self._check_sess()
         shapes = [v.get_shape().as_list() for v in self.variables.values()]
         arrays = unflatten(new_weights, shapes)
-        placeholders = [
-            self.placeholders[k] for k, v in self.variables.items()
-        ]
-        self.sess.run(
-            list(self.assignment_nodes.values()),
-            feed_dict=dict(zip(placeholders, arrays)))
+        if not self.sess:
+            for v, a in zip(self.variables.values(), arrays):
+                v.assign(a)
+        else:
+            placeholders = [
+                self.placeholders[k] for k, v in self.variables.items()
+            ]
+            self.sess.run(
+                list(self.assignment_nodes.values()),
+                feed_dict=dict(zip(placeholders, arrays)))
 
     def get_weights(self):
         """Returns a dictionary containing the weights of the network.
@@ -161,6 +177,10 @@ class TensorFlowVariables:
             Dictionary mapping variable names to their weights.
         """
         self._check_sess()
+        # Eager mode.
+        if not self.sess:
+            return self.variables
+        # Graph mode.
         return self.sess.run(self.variables)
 
     def set_weights(self, new_weights):
diff --git a/rllib/BUILD b/rllib/BUILD
index 7610a1ac0..67db2c8a6 100644
--- a/rllib/BUILD
+++ b/rllib/BUILD
@@ -344,6 +344,7 @@ py_test(
     args = ["--yaml-dir=tuned_examples/sac", "--torch"]
 )
 
+
 # TD3
 py_test(
     name = "run_regression_tests_pendulum_td3_tf",
@@ -1013,6 +1014,13 @@ py_test(
     srcs = ["models/tests/test_distributions.py"]
 )
 
+py_test(
+    name = "test_attention_nets",
+    tags = ["models"],
+    size = "small",
+    srcs = ["models/tests/test_attention_nets.py"]
+)
+
 # --------------------------------------------------------------------
 # Optimizers and Memories
 # rllib/execution/
diff --git a/rllib/agents/a3c/a3c_tf_policy.py b/rllib/agents/a3c/a3c_tf_policy.py
index 8c2d9146a..dde894cd9 100644
--- a/rllib/agents/a3c/a3c_tf_policy.py
+++ b/rllib/agents/a3c/a3c_tf_policy.py
@@ -9,7 +9,7 @@ from ray.rllib.policy.tf_policy import LearningRateSchedule
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class A3CLoss:
diff --git a/rllib/agents/ars/ars_tf_policy.py b/rllib/agents/ars/ars_tf_policy.py
index e04118681..6c2f38022 100644
--- a/rllib/agents/ars/ars_tf_policy.py
+++ b/rllib/agents/ars/ars_tf_policy.py
@@ -13,7 +13,7 @@ from ray.rllib.utils.filter import get_filter
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.spaces.space_utils import unbatch
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class ARSTFPolicy:
@@ -29,8 +29,8 @@ class ARSTFPolicy:
         self.single_threaded = config.get("single_threaded", False)
         self.sess = make_session(single_threaded=self.single_threaded)
 
-        self.inputs = tf.placeholder(tf.float32,
-                                     [None] + list(self.preprocessor.shape))
+        self.inputs = tf1.placeholder(tf.float32,
+                                      [None] + list(self.preprocessor.shape))
 
         # Policy network.
         dist_class, dist_dim = ModelCatalog.get_action_dist(
@@ -52,7 +52,7 @@ class ARSTFPolicy:
         self.num_params = sum(
             np.prod(variable.shape.as_list())
             for _, variable in self.variables.variables.items())
-        self.sess.run(tf.global_variables_initializer())
+        self.sess.run(tf1.global_variables_initializer())
 
     def compute_actions(self,
                         observation,
diff --git a/rllib/agents/ddpg/ddpg_tf_model.py b/rllib/agents/ddpg/ddpg_tf_model.py
index dcaa17aab..84f8a0878 100644
--- a/rllib/agents/ddpg/ddpg_tf_model.py
+++ b/rllib/agents/ddpg/ddpg_tf_model.py
@@ -3,7 +3,7 @@ import numpy as np
 from ray.rllib.models.tf.tf_modelv2 import TFModelV2
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class DDPGTFModel(TFModelV2):
diff --git a/rllib/agents/ddpg/ddpg_tf_policy.py b/rllib/agents/ddpg/ddpg_tf_policy.py
index 4d9730254..027ccba26 100644
--- a/rllib/agents/ddpg/ddpg_tf_policy.py
+++ b/rllib/agents/ddpg/ddpg_tf_policy.py
@@ -22,7 +22,7 @@ from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.tf_ops import huber_loss, minimize_and_clip, \
     make_tf_callable
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 logger = logging.getLogger(__name__)
 
@@ -126,18 +126,18 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
     target_model_out_tp1, _ = policy.target_model(input_dict_next, [], None)
 
     # Policy network evaluation.
-    with tf.variable_scope(POLICY_SCOPE, reuse=True):
-        # prev_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
+    with tf1.variable_scope(POLICY_SCOPE, reuse=True):
+        # prev_update_ops = set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS))
         policy_t = model.get_policy_output(model_out_t)
         # policy_batchnorm_update_ops = list(
-        #    set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
+        #   set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
 
-    with tf.variable_scope(POLICY_TARGET_SCOPE):
+    with tf1.variable_scope(POLICY_TARGET_SCOPE):
         policy_tp1 = \
             policy.target_model.get_policy_output(target_model_out_tp1)
 
     # Action outputs.
-    with tf.variable_scope(ACTION_SCOPE, reuse=True):
+    with tf1.variable_scope(ACTION_SCOPE, reuse=True):
         if policy.config["smooth_target_policy"]:
             target_noise_clip = policy.config["target_noise_clip"]
             clipped_normal_sample = tf.clip_by_value(
@@ -154,29 +154,29 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
             policy_tp1_smoothed = policy_tp1
 
     # Q-net(s) evaluation.
-    # prev_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
-    with tf.variable_scope(Q_SCOPE):
+    # prev_update_ops = set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS))
+    with tf1.variable_scope(Q_SCOPE):
         # Q-values for given actions & observations in given current
         q_t = model.get_q_values(model_out_t, train_batch[SampleBatch.ACTIONS])
 
-    with tf.variable_scope(Q_SCOPE, reuse=True):
+    with tf1.variable_scope(Q_SCOPE, reuse=True):
         # Q-values for current policy (no noise) in given current state
         q_t_det_policy = model.get_q_values(model_out_t, policy_t)
 
     if twin_q:
-        with tf.variable_scope(TWIN_Q_SCOPE):
+        with tf1.variable_scope(TWIN_Q_SCOPE):
             twin_q_t = model.get_twin_q_values(
                 model_out_t, train_batch[SampleBatch.ACTIONS])
     # q_batchnorm_update_ops = list(
-    #     set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
+    #     set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
 
     # Target q-net(s) evaluation.
-    with tf.variable_scope(Q_TARGET_SCOPE):
+    with tf1.variable_scope(Q_TARGET_SCOPE):
         q_tp1 = policy.target_model.get_q_values(target_model_out_tp1,
                                                  policy_tp1_smoothed)
 
     if twin_q:
-        with tf.variable_scope(TWIN_Q_TARGET_SCOPE):
+        with tf1.variable_scope(TWIN_Q_TARGET_SCOPE):
             twin_q_tp1 = policy.target_model.get_twin_q_values(
                 target_model_out_tp1, policy_tp1_smoothed)
 
@@ -220,10 +220,10 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
     if l2_reg is not None:
         for var in policy.model.policy_variables():
             if "bias" not in var.name:
-                actor_loss += (l2_reg * tf.nn.l2_loss(var))
+                actor_loss += (l2_reg * tf1.nn.l2_loss(var))
         for var in policy.model.q_variables():
             if "bias" not in var.name:
-                critic_loss += (l2_reg * tf.nn.l2_loss(var))
+                critic_loss += (l2_reg * tf1.nn.l2_loss(var))
 
     # Model self-supervised losses.
     if policy.config["use_state_preprocessor"]:
@@ -259,9 +259,9 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
 
 def make_ddpg_optimizers(policy, config):
     # Create separate optimizers for actor & critic losses.
-    policy._actor_optimizer = tf.train.AdamOptimizer(
+    policy._actor_optimizer = tf1.train.AdamOptimizer(
         learning_rate=config["actor_lr"])
-    policy._critic_optimizer = tf.train.AdamOptimizer(
+    policy._critic_optimizer = tf1.train.AdamOptimizer(
         learning_rate=config["critic_lr"])
     return None
 
@@ -286,7 +286,7 @@ def build_apply_op(policy, optimizer, grads_and_vars):
     # For policy gradient, update policy net one time v.s.
     # update critic net `policy_delay` time(s).
     should_apply_actor_opt = tf.equal(
-        tf.mod(policy.global_step, policy.config["policy_delay"]), 0)
+        tf.math.floormod(policy.global_step, policy.config["policy_delay"]), 0)
 
     def make_apply_op():
         return policy._actor_optimizer.apply_gradients(
@@ -299,7 +299,7 @@ def build_apply_op(policy, optimizer, grads_and_vars):
     critic_op = policy._critic_optimizer.apply_gradients(
         policy._critic_grads_and_vars)
     # Increment global step & apply ops.
-    with tf.control_dependencies([tf.assign_add(policy.global_step, 1)]):
+    with tf1.control_dependencies([tf1.assign_add(policy.global_step, 1)]):
         return tf.group(actor_op, critic_op)
 
 
@@ -341,7 +341,7 @@ def build_ddpg_stats(policy, batch):
 
 def before_init_fn(policy, obs_space, action_space, config):
     # Create global step for counting the number of update operations.
-    policy.global_step = tf.train.get_or_create_global_step()
+    policy.global_step = tf1.train.get_or_create_global_step()
 
 
 class ComputeTDErrorMixin:
diff --git a/rllib/agents/ddpg/ddpg_torch_policy.py b/rllib/agents/ddpg/ddpg_torch_policy.py
index b3f49f102..b8ad9f801 100644
--- a/rllib/agents/ddpg/ddpg_torch_policy.py
+++ b/rllib/agents/ddpg/ddpg_torch_policy.py
@@ -49,10 +49,10 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
     target_model_out_tp1, _ = policy.target_model(input_dict_next, [], None)
 
     # Policy network evaluation.
-    # prev_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
+    # prev_update_ops = set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS))
     policy_t = model.get_policy_output(model_out_t)
     # policy_batchnorm_update_ops = list(
-    #    set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
+    #    set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
 
     policy_tp1 = \
         policy.target_model.get_policy_output(target_model_out_tp1)
@@ -73,7 +73,7 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
         policy_tp1_smoothed = policy_tp1
 
     # Q-net(s) evaluation.
-    # prev_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
+    # prev_update_ops = set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS))
     # Q-values for given actions & observations in given current
     q_t = model.get_q_values(model_out_t, train_batch[SampleBatch.ACTIONS])
 
@@ -86,7 +86,7 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
         twin_q_t = model.get_twin_q_values(model_out_t,
                                            train_batch[SampleBatch.ACTIONS])
     # q_batchnorm_update_ops = list(
-    #     set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
+    #     set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
 
     # Target q-net(s) evaluation.
     q_tp1 = policy.target_model.get_q_values(target_model_out_tp1,
diff --git a/rllib/agents/ddpg/noop_model.py b/rllib/agents/ddpg/noop_model.py
index 8da8af4ed..4dba83b9d 100644
--- a/rllib/agents/ddpg/noop_model.py
+++ b/rllib/agents/ddpg/noop_model.py
@@ -4,7 +4,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+_, tf, _ = try_import_tf()
 
 
 class NoopModel(TFModelV2):
diff --git a/rllib/agents/ddpg/tests/test_td3.py b/rllib/agents/ddpg/tests/test_td3.py
index 80dfe92d4..1c0356278 100644
--- a/rllib/agents/ddpg/tests/test_td3.py
+++ b/rllib/agents/ddpg/tests/test_td3.py
@@ -6,7 +6,7 @@ from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.test_utils import check, check_compute_single_action, \
     framework_iterator
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class TestTD3(unittest.TestCase):
@@ -32,8 +32,9 @@ class TestTD3(unittest.TestCase):
 
         # Test against all frameworks.
         for _ in framework_iterator(config, frameworks="tf"):
+            lcl_config = config.copy()
             # Default GaussianNoise setup.
-            trainer = td3.TD3Trainer(config=config, env="Pendulum-v0")
+            trainer = td3.TD3Trainer(config=lcl_config, env="Pendulum-v0")
             # Setting explore=False should always return the same action.
             a_ = trainer.compute_action(obs, explore=False)
             for _ in range(50):
@@ -44,9 +45,10 @@ class TestTD3(unittest.TestCase):
             for _ in range(50):
                 actions.append(trainer.compute_action(obs))
             check(np.std(actions), 0.0, false=True)
+            trainer.stop()
 
             # Check randomness at beginning.
-            config["exploration_config"] = {
+            lcl_config["exploration_config"] = {
                 # Act randomly at beginning ...
                 "random_timesteps": 30,
                 # Then act very closely to deterministic actions thereafter.
@@ -54,7 +56,7 @@ class TestTD3(unittest.TestCase):
                 "initial_scale": 0.001,
                 "final_scale": 0.001,
             }
-            trainer = td3.TD3Trainer(config=config, env="Pendulum-v0")
+            trainer = td3.TD3Trainer(config=lcl_config, env="Pendulum-v0")
             # ts=1 (get a deterministic action as per explore=False).
             deterministic_action = trainer.compute_action(obs, explore=False)
             # ts=2-5 (in random window).
@@ -73,6 +75,7 @@ class TestTD3(unittest.TestCase):
             for _ in range(50):
                 a = trainer.compute_action(obs, explore=False)
                 check(a, deterministic_action)
+            trainer.stop()
 
 
 if __name__ == "__main__":
diff --git a/rllib/agents/dqn/distributional_q_tf_model.py b/rllib/agents/dqn/distributional_q_tf_model.py
index c3e936f8e..cc30e2d9d 100644
--- a/rllib/agents/dqn/distributional_q_tf_model.py
+++ b/rllib/agents/dqn/distributional_q_tf_model.py
@@ -3,7 +3,7 @@ import numpy as np
 from ray.rllib.models.tf.tf_modelv2 import TFModelV2
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class DistributionalQTFModel(TFModelV2):
@@ -155,7 +155,7 @@ class DistributionalQTFModel(TFModelV2):
                     units=num_atoms, activation=None)(state_out)
             return state_score
 
-        if tf.executing_eagerly():
+        if tf1.executing_eagerly():
             from tensorflow.python.ops import variable_scope
             # Have to use a variable store to reuse variables in eager mode
             store = variable_scope.EagerVariableStore()
@@ -163,30 +163,32 @@ class DistributionalQTFModel(TFModelV2):
             # Save the scope objects, since in eager we will execute this
             # path repeatedly and there is no guarantee it will always be run
             # in the same original scope.
-            with tf.variable_scope(name + "/action_value") as action_scope:
+            with tf1.variable_scope(name + "/action_value") as action_scope:
                 pass
-            with tf.variable_scope(name + "/state_value") as state_scope:
+            with tf1.variable_scope(name + "/state_value") as state_scope:
                 pass
 
             def build_action_value_in_scope(model_out):
                 with store.as_default():
-                    with tf.variable_scope(action_scope, reuse=tf.AUTO_REUSE):
+                    with tf1.variable_scope(
+                            action_scope, reuse=tf1.AUTO_REUSE):
                         return build_action_value(model_out)
 
             def build_state_score_in_scope(model_out):
                 with store.as_default():
-                    with tf.variable_scope(state_scope, reuse=tf.AUTO_REUSE):
+                    with tf1.variable_scope(
+                            state_scope, reuse=tf1.AUTO_REUSE):
                         return build_state_score(model_out)
         else:
 
             def build_action_value_in_scope(model_out):
-                with tf.variable_scope(
-                        name + "/action_value", reuse=tf.AUTO_REUSE):
+                with tf1.variable_scope(
+                        name + "/action_value", reuse=tf1.AUTO_REUSE):
                     return build_action_value(model_out)
 
             def build_state_score_in_scope(model_out):
-                with tf.variable_scope(
-                        name + "/state_value", reuse=tf.AUTO_REUSE):
+                with tf1.variable_scope(
+                        name + "/state_value", reuse=tf1.AUTO_REUSE):
                     return build_state_score(model_out)
 
         q_out = build_action_value_in_scope(self.model_out)
@@ -241,33 +243,33 @@ class DistributionalQTFModel(TFModelV2):
         epsilon_w = tf.matmul(
             a=tf.expand_dims(epsilon_in, -1), b=tf.expand_dims(epsilon_out, 0))
         epsilon_b = epsilon_out
-        sigma_w = tf.get_variable(
+        sigma_w = tf1.get_variable(
             name=prefix + "_sigma_w",
             shape=[in_size, out_size],
             dtype=tf.float32,
-            initializer=tf.random_uniform_initializer(
+            initializer=tf1.random_uniform_initializer(
                 minval=-1.0 / np.sqrt(float(in_size)),
                 maxval=1.0 / np.sqrt(float(in_size))))
         # TF noise generation can be unreliable on GPU
         # If generating the noise on the CPU,
         # lowering sigma0 to 0.1 may be helpful
-        sigma_b = tf.get_variable(
+        sigma_b = tf1.get_variable(
             name=prefix + "_sigma_b",
             shape=[out_size],
             dtype=tf.float32,  # 0.5~GPU, 0.1~CPU
-            initializer=tf.constant_initializer(
+            initializer=tf1.constant_initializer(
                 sigma0 / np.sqrt(float(in_size))))
 
-        w = tf.get_variable(
+        w = tf1.get_variable(
             name=prefix + "_fc_w",
             shape=[in_size, out_size],
             dtype=tf.float32,
-            initializer=tf.initializers.glorot_uniform())
-        b = tf.get_variable(
+            initializer=tf.initializers.GlorotUniform())
+        b = tf1.get_variable(
             name=prefix + "_fc_b",
             shape=[out_size],
             dtype=tf.float32,
-            initializer=tf.zeros_initializer())
+            initializer=tf.initializers.Zeros())
 
         action_activation = \
             tf.keras.layers.Lambda(lambda x: tf.matmul(
diff --git a/rllib/agents/dqn/dqn_tf_policy.py b/rllib/agents/dqn/dqn_tf_policy.py
index c5e13bf5e..7faaf6da9 100644
--- a/rllib/agents/dqn/dqn_tf_policy.py
+++ b/rllib/agents/dqn/dqn_tf_policy.py
@@ -17,7 +17,7 @@ from ray.rllib.utils.tf_ops import huber_loss, reduce_mean_ignore_inf, \
     minimize_and_clip
 from ray.rllib.utils.tf_ops import make_tf_callable
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 Q_SCOPE = "q_func"
 Q_TARGET_SCOPE = "target_q_func"
@@ -253,7 +253,7 @@ def build_q_losses(policy, model, _, train_batch):
 
 
 def adam_optimizer(policy, config):
-    return tf.train.AdamOptimizer(
+    return tf1.train.AdamOptimizer(
         learning_rate=policy.cur_lr, epsilon=config["adam_epsilon"])
 
 
diff --git a/rllib/agents/dqn/simple_q_model.py b/rllib/agents/dqn/simple_q_model.py
index 432071775..54eee6000 100644
--- a/rllib/agents/dqn/simple_q_model.py
+++ b/rllib/agents/dqn/simple_q_model.py
@@ -1,7 +1,7 @@
 from ray.rllib.models.tf.tf_modelv2 import TFModelV2
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class SimpleQModel(TFModelV2):
diff --git a/rllib/agents/dqn/simple_q_tf_policy.py b/rllib/agents/dqn/simple_q_tf_policy.py
index a9879c434..c6a70615b 100644
--- a/rllib/agents/dqn/simple_q_tf_policy.py
+++ b/rllib/agents/dqn/simple_q_tf_policy.py
@@ -15,7 +15,7 @@ from ray.rllib.policy.tf_policy_template import build_tf_policy
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.tf_ops import huber_loss, make_tf_callable
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 logger = logging.getLogger(__name__)
 
 Q_SCOPE = "q_func"
diff --git a/rllib/agents/dqn/tests/test_dqn.py b/rllib/agents/dqn/tests/test_dqn.py
index 491103c3a..287b46c77 100644
--- a/rllib/agents/dqn/tests/test_dqn.py
+++ b/rllib/agents/dqn/tests/test_dqn.py
@@ -7,7 +7,7 @@ from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.test_utils import check, check_compute_single_action, \
     framework_iterator
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class TestDQN(unittest.TestCase):
diff --git a/rllib/agents/dqn/tests/test_simple_q.py b/rllib/agents/dqn/tests/test_simple_q.py
index 9039cbcc6..057d6350c 100644
--- a/rllib/agents/dqn/tests/test_simple_q.py
+++ b/rllib/agents/dqn/tests/test_simple_q.py
@@ -11,7 +11,7 @@ from ray.rllib.utils.numpy import fc, one_hot, huber_loss
 from ray.rllib.utils.test_utils import check, check_compute_single_action, \
     framework_iterator
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class TestSimpleQ(unittest.TestCase):
diff --git a/rllib/agents/es/es_tf_policy.py b/rllib/agents/es/es_tf_policy.py
index 73964179d..c739a906c 100644
--- a/rllib/agents/es/es_tf_policy.py
+++ b/rllib/agents/es/es_tf_policy.py
@@ -14,7 +14,7 @@ from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space, \
     unbatch
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 tree = try_import_tree()
 
 
@@ -60,9 +60,9 @@ def rollout(policy, env, timestep_limit=None, add_noise=False, offset=0.0):
 
 def make_session(single_threaded):
     if not single_threaded:
-        return tf.Session()
-    return tf.Session(
-        config=tf.ConfigProto(
+        return tf1.Session()
+    return tf1.Session(
+        config=tf1.ConfigProto(
             inter_op_parallelism_threads=1, intra_op_parallelism_threads=1))
 
 
@@ -77,8 +77,8 @@ class ESTFPolicy:
                                              self.preprocessor.shape)
         self.single_threaded = config.get("single_threaded", False)
         self.sess = make_session(single_threaded=self.single_threaded)
-        self.inputs = tf.placeholder(tf.float32,
-                                     [None] + list(self.preprocessor.shape))
+        self.inputs = tf1.placeholder(tf.float32,
+                                      [None] + list(self.preprocessor.shape))
 
         # Policy network.
         dist_class, dist_dim = ModelCatalog.get_action_dist(
@@ -98,7 +98,7 @@ class ESTFPolicy:
         self.num_params = sum(
             np.prod(variable.shape.as_list())
             for _, variable in self.variables.variables.items())
-        self.sess.run(tf.global_variables_initializer())
+        self.sess.run(tf1.global_variables_initializer())
 
     def compute_actions(self,
                         observation,
diff --git a/rllib/agents/impala/tests/test_impala.py b/rllib/agents/impala/tests/test_impala.py
index 7089a2281..e9885ac66 100644
--- a/rllib/agents/impala/tests/test_impala.py
+++ b/rllib/agents/impala/tests/test_impala.py
@@ -6,7 +6,7 @@ from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.test_utils import check_compute_single_action, \
     framework_iterator
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class TestIMPALA(unittest.TestCase):
diff --git a/rllib/agents/impala/tests/test_vtrace.py b/rllib/agents/impala/tests/test_vtrace.py
index b0277c036..53d0d42a5 100644
--- a/rllib/agents/impala/tests/test_vtrace.py
+++ b/rllib/agents/impala/tests/test_vtrace.py
@@ -30,7 +30,7 @@ from ray.rllib.utils.framework import try_import_tf, try_import_torch
 from ray.rllib.utils.numpy import softmax
 from ray.rllib.utils.test_utils import check, framework_iterator
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 
@@ -185,20 +185,20 @@ class VtraceTest(unittest.TestCase):
                 # can deal with that.
                 inputs_ = {
                     # T, B, NUM_ACTIONS
-                    "behaviour_policy_logits": tf.placeholder(
+                    "behaviour_policy_logits": tf1.placeholder(
                         dtype=tf.float32, shape=[None, None, None]),
                     # T, B, NUM_ACTIONS
-                    "target_policy_logits": tf.placeholder(
+                    "target_policy_logits": tf1.placeholder(
                         dtype=tf.float32, shape=[None, None, None]),
-                    "actions": tf.placeholder(
+                    "actions": tf1.placeholder(
                         dtype=tf.int32, shape=[None, None]),
-                    "discounts": tf.placeholder(
+                    "discounts": tf1.placeholder(
                         dtype=tf.float32, shape=[None, None]),
-                    "rewards": tf.placeholder(
+                    "rewards": tf1.placeholder(
                         dtype=tf.float32, shape=[None, None]),
-                    "values": tf.placeholder(
+                    "values": tf1.placeholder(
                         dtype=tf.float32, shape=[None, None]),
-                    "bootstrap_value": tf.placeholder(
+                    "bootstrap_value": tf1.placeholder(
                         dtype=tf.float32, shape=[None]),
                 }
             else:
@@ -282,15 +282,15 @@ class VtraceTest(unittest.TestCase):
             vtrace = vtrace_tf if fw != "torch" else vtrace_torch
             if fw == "tf":
                 inputs_ = {
-                    "log_rhos": tf.placeholder(
+                    "log_rhos": tf1.placeholder(
                         dtype=tf.float32, shape=[None, None, 1]),
-                    "discounts": tf.placeholder(
+                    "discounts": tf1.placeholder(
                         dtype=tf.float32, shape=[None, None, 1]),
-                    "rewards": tf.placeholder(
+                    "rewards": tf1.placeholder(
                         dtype=tf.float32, shape=[None, None, 42]),
-                    "values": tf.placeholder(
+                    "values": tf1.placeholder(
                         dtype=tf.float32, shape=[None, None, 42]),
-                    "bootstrap_value": tf.placeholder(
+                    "bootstrap_value": tf1.placeholder(
                         dtype=tf.float32, shape=[None, 42])
                 }
             else:
@@ -310,16 +310,16 @@ class VtraceTest(unittest.TestCase):
             vtrace = vtrace_tf if fw != "torch" else vtrace_torch
             if fw == "tf":
                 inputs_ = {
-                    "log_rhos": tf.placeholder(
+                    "log_rhos": tf1.placeholder(
                         dtype=tf.float32, shape=[None, None, 1]),
-                    "discounts": tf.placeholder(
+                    "discounts": tf1.placeholder(
                         dtype=tf.float32, shape=[None, None, 1]),
-                    "rewards": tf.placeholder(
+                    "rewards": tf1.placeholder(
                         dtype=tf.float32, shape=[None, None, 42]),
-                    "values": tf.placeholder(
+                    "values": tf1.placeholder(
                         dtype=tf.float32, shape=[None, None, 42]),
                     # Should be [None, 42].
-                    "bootstrap_value": tf.placeholder(
+                    "bootstrap_value": tf1.placeholder(
                         dtype=tf.float32, shape=[None])
                 }
             else:
diff --git a/rllib/agents/impala/vtrace_tf.py b/rllib/agents/impala/vtrace_tf.py
index aa6ab5c7a..fb612c57e 100644
--- a/rllib/agents/impala/vtrace_tf.py
+++ b/rllib/agents/impala/vtrace_tf.py
@@ -33,7 +33,7 @@ import collections
 from ray.rllib.models.tf.tf_action_dist import Categorical
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 VTraceFromLogitsReturns = collections.namedtuple("VTraceFromLogitsReturns", [
     "vs", "pg_advantages", "log_rhos", "behaviour_action_log_probs",
@@ -222,7 +222,7 @@ def multi_from_logits(behaviour_policy_logits,
         behaviour_policy_logits[i].shape.assert_has_rank(3)
         target_policy_logits[i].shape.assert_has_rank(3)
 
-    with tf.name_scope(
+    with tf1.name_scope(
             name,
             values=[
                 behaviour_policy_logits, target_policy_logits, actions,
@@ -332,21 +332,22 @@ def from_importance_weights(log_rhos,
     if clip_pg_rho_threshold is not None:
         clip_pg_rho_threshold.shape.assert_has_rank(0)
 
-    with tf.name_scope(
+    with tf1.name_scope(
             name,
             values=[log_rhos, discounts, rewards, values, bootstrap_value]):
-        rhos = tf.exp(log_rhos)
+        rhos = tf.math.exp(log_rhos)
         if clip_rho_threshold is not None:
             clipped_rhos = tf.minimum(
                 clip_rho_threshold, rhos, name="clipped_rhos")
 
-            tf.summary.histogram("clipped_rhos_1000", tf.minimum(1000.0, rhos))
-            tf.summary.scalar(
+            tf1.summary.histogram(
+                    "clipped_rhos_1000", tf.minimum(1000.0, rhos))
+            tf1.summary.scalar(
                 "num_of_clipped_rhos",
                 tf.reduce_sum(
                     tf.cast(
                         tf.equal(clipped_rhos, clip_rho_threshold), tf.int32)))
-            tf.summary.scalar("size_of_clipped_rhos", tf.size(clipped_rhos))
+            tf1.summary.scalar("size_of_clipped_rhos", tf.size(clipped_rhos))
         else:
             clipped_rhos = rhos
 
diff --git a/rllib/agents/impala/vtrace_tf_policy.py b/rllib/agents/impala/vtrace_tf_policy.py
index a8baf9bbf..0237772bc 100644
--- a/rllib/agents/impala/vtrace_tf_policy.py
+++ b/rllib/agents/impala/vtrace_tf_policy.py
@@ -16,7 +16,7 @@ from ray.rllib.policy.tf_policy import LearningRateSchedule, \
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.tf_ops import explained_variance
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 logger = logging.getLogger(__name__)
 
@@ -253,10 +253,11 @@ def postprocess_trajectory(policy,
 
 def choose_optimizer(policy, config):
     if policy.config["opt_type"] == "adam":
-        return tf.train.AdamOptimizer(policy.cur_lr)
+        return tf1.train.AdamOptimizer(policy.cur_lr)
     else:
-        return tf.train.RMSPropOptimizer(policy.cur_lr, config["decay"],
-                                         config["momentum"], config["epsilon"])
+        return tf1.train.RMSPropOptimizer(
+            policy.cur_lr,
+            config["decay"], config["momentum"], config["epsilon"])
 
 
 def clip_gradients(policy, optimizer, loss):
diff --git a/rllib/agents/maml/maml_tf_policy.py b/rllib/agents/maml/maml_tf_policy.py
index 541cab675..f4a0a9ca5 100644
--- a/rllib/agents/maml/maml_tf_policy.py
+++ b/rllib/agents/maml/maml_tf_policy.py
@@ -9,7 +9,7 @@ from ray.rllib.agents.ppo.ppo_tf_policy import postprocess_ppo_gae, \
     vf_preds_fetches, clip_gradients, setup_config, ValueNetworkMixin
 from ray.rllib.utils.framework import get_activation_fn
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 logger = logging.getLogger(__name__)
 
@@ -33,7 +33,7 @@ def PPOLoss(dist_class,
         pi_new_logp = curr_dist.logp(actions)
         pi_old_logp = prev_dist.logp(actions)
 
-        logp_ratio = tf.exp(pi_new_logp - pi_old_logp)
+        logp_ratio = tf.math.exp(pi_new_logp - pi_old_logp)
         if clip_loss:
             return tf.minimum(
                 advantages * logp_ratio,
@@ -49,10 +49,10 @@ def PPOLoss(dist_class,
 
     def vf_loss(value_fn, value_targets, vf_preds, vf_clip_param=0.1):
         # GAE Value Function Loss
-        vf_loss1 = tf.square(value_fn - value_targets)
+        vf_loss1 = tf.math.square(value_fn - value_targets)
         vf_clipped = vf_preds + tf.clip_by_value(value_fn - vf_preds,
                                                  -vf_clip_param, vf_clip_param)
-        vf_loss2 = tf.square(vf_clipped - value_targets)
+        vf_loss2 = tf.math.square(vf_clipped - value_targets)
         vf_loss = tf.maximum(vf_loss1, vf_loss2)
         return vf_loss
 
@@ -104,7 +104,7 @@ class WorkerLoss(object):
             vf_clip_param=vf_clip_param,
             vf_loss_coeff=vf_loss_coeff,
             clip_loss=clip_loss)
-        self.loss = tf.Print(self.loss, ["Worker Adapt Loss", self.loss])
+        self.loss = tf1.Print(self.loss, ["Worker Adapt Loss", self.loss])
 
 
 # This is the Meta-Update computation graph for main (meta-update step)
@@ -230,7 +230,7 @@ class MAMLLoss(object):
             tf.multiply(self.cur_kl_coeff, mean_inner_kl))
         self.loss = tf.reduce_mean(tf.stack(ppo_obj,
                                             axis=0)) + self.inner_kl_loss
-        self.loss = tf.Print(
+        self.loss = tf1.Print(
             self.loss,
             ["Meta-Loss", self.loss, "Inner KL", self.mean_inner_kl])
 
@@ -309,7 +309,7 @@ class MAMLLoss(object):
 def maml_loss(policy, model, dist_class, train_batch):
     logits, state = model.from_batch(train_batch)
 
-    policy._loss_input_dict["split"] = tf.placeholder(
+    policy._loss_input_dict["split"] = tf1.placeholder(
         tf.int32,
         name="Meta-Update-Splitting",
         shape=(policy.config["inner_adaptation_steps"] + 1,
@@ -333,8 +333,8 @@ def maml_loss(policy, model, dist_class, train_batch):
             vf_loss_coeff=policy.config["vf_loss_coeff"],
             clip_loss=False)
     else:
-        policy.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
-                                            tf.get_variable_scope().name)
+        policy.var_list = tf1.get_collection(tf1.GraphKeys.TRAINABLE_VARIABLES,
+                                             tf1.get_variable_scope().name)
         policy.loss_obj = MAMLLoss(
             model=model,
             dist_class=dist_class,
@@ -380,8 +380,8 @@ class KLCoeffMixin:
         self.kl_coeff_val = [config["kl_coeff"]
                              ] * config["inner_adaptation_steps"]
         self.kl_target = self.config["kl_target"]
-        self.kl_coeff = tf.get_variable(
-            initializer=tf.constant_initializer(self.kl_coeff_val),
+        self.kl_coeff = tf1.get_variable(
+            initializer=tf.keras.initializers.Constant(self.kl_coeff_val),
             name="kl_coeff",
             shape=(config["inner_adaptation_steps"]),
             trainable=False,
@@ -404,8 +404,8 @@ def maml_optimizer_fn(policy, config):
     Meta-Policy uses Adam optimizer for meta-update
     """
     if not config["worker_index"]:
-        return tf.train.AdamOptimizer(learning_rate=config["lr"])
-    return tf.train.GradientDescentOptimizer(learning_rate=config["inner_lr"])
+        return tf1.train.AdamOptimizer(learning_rate=config["lr"])
+    return tf1.train.GradientDescentOptimizer(learning_rate=config["inner_lr"])
 
 
 def setup_mixins(policy, obs_space, action_space, config):
diff --git a/rllib/agents/marwil/marwil_tf_policy.py b/rllib/agents/marwil/marwil_tf_policy.py
index 947142f1d..cb00f88c0 100644
--- a/rllib/agents/marwil/marwil_tf_policy.py
+++ b/rllib/agents/marwil/marwil_tf_policy.py
@@ -6,7 +6,7 @@ from ray.rllib.policy.tf_policy_template import build_tf_policy
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class ValueNetworkMixin:
@@ -37,13 +37,13 @@ class ReweightedImitationLoss:
         # advantage estimation
         adv = cumulative_rewards - state_values
         # update averaged advantage norm
-        update_adv_norm = tf.assign_add(
+        update_adv_norm = tf1.assign_add(
             ref=policy._ma_adv_norm,
             value=1e-6 * (
                     tf.reduce_mean(tf.math.square(adv)) - policy._ma_adv_norm))
 
         # exponentially weighted advantages
-        with tf.control_dependencies([update_adv_norm]):
+        with tf1.control_dependencies([update_adv_norm]):
             exp_advs = tf.math.exp(beta * tf.math.divide(
                 adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
 
@@ -125,7 +125,7 @@ def setup_mixins(policy, obs_space, action_space, config):
     ValueNetworkMixin.__init__(policy)
     # Set up a tf-var for the moving avg (do this here to make it work with
     # eager mode).
-    policy._ma_adv_norm = tf.get_variable(
+    policy._ma_adv_norm = tf1.get_variable(
         name="moving_average_of_advantage_norm",
         dtype=tf.float32,
         initializer=100.0,
diff --git a/rllib/agents/marwil/tests/test_marwil.py b/rllib/agents/marwil/tests/test_marwil.py
index bc49f39ea..fa6a9a98d 100644
--- a/rllib/agents/marwil/tests/test_marwil.py
+++ b/rllib/agents/marwil/tests/test_marwil.py
@@ -6,7 +6,7 @@ from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.test_utils import check_compute_single_action, \
     framework_iterator
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class TestMARWIL(unittest.TestCase):
diff --git a/rllib/agents/pg/pg_tf_policy.py b/rllib/agents/pg/pg_tf_policy.py
index 8f937a8de..88ccc2ac7 100644
--- a/rllib/agents/pg/pg_tf_policy.py
+++ b/rllib/agents/pg/pg_tf_policy.py
@@ -5,7 +5,7 @@ from ray.rllib.policy.tf_policy_template import build_tf_policy
 from ray.rllib.policy.sample_batch import SampleBatch
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 def post_process_advantages(policy,
diff --git a/rllib/agents/ppo/appo_tf_policy.py b/rllib/agents/ppo/appo_tf_policy.py
index c733890ec..da908966c 100644
--- a/rllib/agents/ppo/appo_tf_policy.py
+++ b/rllib/agents/ppo/appo_tf_policy.py
@@ -21,7 +21,7 @@ from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 POLICY_SCOPE = "func"
 TARGET_POLICY_SCOPE = "target_func"
@@ -65,7 +65,7 @@ class PPOSurrogateLoss:
         def reduce_mean_valid(t):
             return tf.reduce_mean(tf.boolean_mask(t, valid_mask))
 
-        logp_ratio = tf.exp(actions_logp - prev_actions_logp)
+        logp_ratio = tf.math.exp(actions_logp - prev_actions_logp)
 
         surrogate_loss = tf.minimum(
             advantages * logp_ratio,
@@ -170,7 +170,7 @@ class VTraceSurrogateLoss:
                                               tf.float32))
 
         self.is_ratio = tf.clip_by_value(
-            tf.exp(prev_actions_logp - old_policy_actions_logp), 0.0, 2.0)
+            tf.math.exp(prev_actions_logp - old_policy_actions_logp), 0.0, 2.0)
         logp_ratio = self.is_ratio * tf.exp(actions_logp - prev_actions_logp)
 
         advantages = self.vtrace_returns.pg_advantages
diff --git a/rllib/agents/ppo/ppo.py b/rllib/agents/ppo/ppo.py
index 082a69696..62aa1ba7b 100644
--- a/rllib/agents/ppo/ppo.py
+++ b/rllib/agents/ppo/ppo.py
@@ -7,9 +7,6 @@ from ray.rllib.execution.rollout_ops import ParallelRollouts, ConcatBatches, \
     StandardizeFields, SelectExperiences
 from ray.rllib.execution.train_ops import TrainOneStep, TrainTFMultiGPU
 from ray.rllib.execution.metric_ops import StandardMetricsReporting
-from ray.rllib.utils.framework import try_import_tf
-
-tf = try_import_tf()
 
 logger = logging.getLogger(__name__)
 
diff --git a/rllib/agents/ppo/ppo_tf_policy.py b/rllib/agents/ppo/ppo_tf_policy.py
index 963d4d816..f5af4281f 100644
--- a/rllib/agents/ppo/ppo_tf_policy.py
+++ b/rllib/agents/ppo/ppo_tf_policy.py
@@ -10,7 +10,7 @@ from ray.rllib.policy.tf_policy_template import build_tf_policy
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 logger = logging.getLogger(__name__)
 
@@ -174,7 +174,7 @@ def postprocess_ppo_gae(policy,
     else:
         next_state = []
         for i in range(policy.num_state_tensors()):
-            next_state.append([sample_batch["state_out_{}".format(i)][-1]])
+            next_state.append(sample_batch["state_out_{}".format(i)][-1])
         last_r = policy._value(sample_batch[SampleBatch.NEXT_OBS][-1],
                                sample_batch[SampleBatch.ACTIONS][-1],
                                sample_batch[SampleBatch.REWARDS][-1],
@@ -206,7 +206,7 @@ class KLCoeffMixin:
         # KL Coefficient
         self.kl_coeff_val = config["kl_coeff"]
         self.kl_target = config["kl_target"]
-        self.kl_coeff = tf.get_variable(
+        self.kl_coeff = tf1.get_variable(
             initializer=tf.constant_initializer(self.kl_coeff_val),
             name="kl_coeff",
             shape=(),
diff --git a/rllib/agents/ppo/ppo_torch_policy.py b/rllib/agents/ppo/ppo_torch_policy.py
index f94d18d6a..2d2f8e1fc 100644
--- a/rllib/agents/ppo/ppo_torch_policy.py
+++ b/rllib/agents/ppo/ppo_torch_policy.py
@@ -194,7 +194,7 @@ class ValueNetworkMixin:
                     SampleBatch.PREV_REWARDS: convert_to_torch_tensor(
                         np.asarray([prev_reward])),
                     "is_training": False,
-                }, [convert_to_torch_tensor(np.asarray(s)) for s in state],
+                }, [convert_to_torch_tensor(np.asarray([s])) for s in state],
                     convert_to_torch_tensor(np.asarray([1])))
                 return self.model.value_function()[0]
 
diff --git a/rllib/agents/ppo/tests/test_appo.py b/rllib/agents/ppo/tests/test_appo.py
index de21398fc..7161bb83a 100644
--- a/rllib/agents/ppo/tests/test_appo.py
+++ b/rllib/agents/ppo/tests/test_appo.py
@@ -2,12 +2,9 @@ import unittest
 
 import ray
 import ray.rllib.agents.ppo as ppo
-from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.test_utils import check_compute_single_action, \
     framework_iterator
 
-tf = try_import_tf()
-
 
 class TestAPPO(unittest.TestCase):
     @classmethod
diff --git a/rllib/agents/ppo/tests/test_ddppo.py b/rllib/agents/ppo/tests/test_ddppo.py
index 25cd56c27..4c56a22ee 100644
--- a/rllib/agents/ppo/tests/test_ddppo.py
+++ b/rllib/agents/ppo/tests/test_ddppo.py
@@ -2,12 +2,9 @@ import unittest
 
 import ray
 import ray.rllib.agents.ppo as ppo
-from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.test_utils import check_compute_single_action, \
     framework_iterator
 
-tf = try_import_tf()
-
 
 class TestDDPPO(unittest.TestCase):
     @classmethod
diff --git a/rllib/agents/ppo/tests/test_ppo.py b/rllib/agents/ppo/tests/test_ppo.py
index 472689b3b..b1dec4e5a 100644
--- a/rllib/agents/ppo/tests/test_ppo.py
+++ b/rllib/agents/ppo/tests/test_ppo.py
@@ -13,12 +13,10 @@ from ray.rllib.models.tf.tf_action_dist import Categorical
 from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
 from ray.rllib.models.torch.torch_action_dist import TorchCategorical
 from ray.rllib.policy.sample_batch import SampleBatch
-from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.numpy import fc
 from ray.rllib.utils.test_utils import check, framework_iterator, \
     check_compute_single_action
 
-tf = try_import_tf()
 
 # Fake CartPole episode of n time steps.
 FAKE_BATCH = {
@@ -40,7 +38,7 @@ FAKE_BATCH = {
 class TestPPO(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        ray.init()
+        ray.init(local_mode=True)
 
     @classmethod
     def tearDownClass(cls):
diff --git a/rllib/agents/sac/sac_tf_model.py b/rllib/agents/sac/sac_tf_model.py
index f505ad382..e2ac33f0b 100644
--- a/rllib/agents/sac/sac_tf_model.py
+++ b/rllib/agents/sac/sac_tf_model.py
@@ -4,7 +4,7 @@ import numpy as np
 from ray.rllib.models.tf.tf_modelv2 import TFModelV2
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class SACTFModel(TFModelV2):
diff --git a/rllib/agents/sac/sac_tf_policy.py b/rllib/agents/sac/sac_tf_policy.py
index b68f96ee8..49076ac48 100644
--- a/rllib/agents/sac/sac_tf_policy.py
+++ b/rllib/agents/sac/sac_tf_policy.py
@@ -17,7 +17,7 @@ from ray.rllib.utils.error import UnsupportedSpaceException
 from ray.rllib.utils.framework import try_import_tf, try_import_tfp
 from ray.rllib.utils.tf_ops import minimize_and_clip
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 tfp = try_import_tfp()
 
 logger = logging.getLogger(__name__)
@@ -138,10 +138,10 @@ def sac_actor_critic_loss(policy, model, _, train_batch):
     if model.discrete:
         # Get all action probs directly from pi and form their logp.
         log_pis_t = tf.nn.log_softmax(model.get_policy_output(model_out_t), -1)
-        policy_t = tf.exp(log_pis_t)
+        policy_t = tf.math.exp(log_pis_t)
         log_pis_tp1 = tf.nn.log_softmax(
             model.get_policy_output(model_out_tp1), -1)
-        policy_tp1 = tf.exp(log_pis_tp1)
+        policy_tp1 = tf.math.exp(log_pis_tp1)
         # Q-values.
         q_t = model.get_q_values(model_out_t)
         # Target Q-values.
@@ -219,20 +219,20 @@ def sac_actor_critic_loss(policy, model, _, train_batch):
         policy.config["gamma"]**policy.config["n_step"] * q_tp1_best_masked)
 
     # Compute the TD-error (potentially clipped).
-    base_td_error = tf.abs(q_t_selected - q_t_selected_target)
+    base_td_error = tf.math.abs(q_t_selected - q_t_selected_target)
     if policy.config["twin_q"]:
-        twin_td_error = tf.abs(twin_q_t_selected - q_t_selected_target)
+        twin_td_error = tf.math.abs(twin_q_t_selected - q_t_selected_target)
         td_error = 0.5 * (base_td_error + twin_td_error)
     else:
         td_error = base_td_error
 
     critic_loss = [
-        tf.losses.mean_squared_error(
+        tf1.losses.mean_squared_error(
             labels=q_t_selected_target, predictions=q_t_selected, weights=0.5)
     ]
     if policy.config["twin_q"]:
         critic_loss.append(
-            tf.losses.mean_squared_error(
+            tf1.losses.mean_squared_error(
                 labels=q_t_selected_target,
                 predictions=twin_q_t_selected,
                 weights=0.5))
@@ -274,7 +274,7 @@ def sac_actor_critic_loss(policy, model, _, train_batch):
 
     # in a custom apply op we handle the losses separately, but return them
     # combined in one loss for now
-    return actor_loss + tf.add_n(critic_loss) + alpha_loss
+    return actor_loss + tf.math.add_n(critic_loss) + alpha_loss
 
 
 def gradients(policy, optimizer, loss):
@@ -358,7 +358,7 @@ def apply_gradients(policy, optimizer, grads_and_vars):
 
     alpha_apply_ops = policy._alpha_optimizer.apply_gradients(
         policy._alpha_grads_and_vars,
-        global_step=tf.train.get_or_create_global_step())
+        global_step=tf1.train.get_or_create_global_step())
     return tf.group([actor_apply_ops, alpha_apply_ops] + critic_apply_ops)
 
 
@@ -381,20 +381,20 @@ def stats(policy, train_batch):
 class ActorCriticOptimizerMixin:
     def __init__(self, config):
         # create global step for counting the number of update operations
-        self.global_step = tf.train.get_or_create_global_step()
+        self.global_step = tf1.train.get_or_create_global_step()
 
         # use separate optimizers for actor & critic
-        self._actor_optimizer = tf.train.AdamOptimizer(
+        self._actor_optimizer = tf1.train.AdamOptimizer(
             learning_rate=config["optimization"]["actor_learning_rate"])
         self._critic_optimizer = [
-            tf.train.AdamOptimizer(
+            tf1.train.AdamOptimizer(
                 learning_rate=config["optimization"]["critic_learning_rate"])
         ]
         if config["twin_q"]:
             self._critic_optimizer.append(
-                tf.train.AdamOptimizer(learning_rate=config["optimization"][
+                tf1.train.AdamOptimizer(learning_rate=config["optimization"][
                     "critic_learning_rate"]))
-        self._alpha_optimizer = tf.train.AdamOptimizer(
+        self._alpha_optimizer = tf1.train.AdamOptimizer(
             learning_rate=config["optimization"]["entropy_learning_rate"])
 
 
diff --git a/rllib/agents/sac/tests/test_sac.py b/rllib/agents/sac/tests/test_sac.py
index 134127d62..b941b974a 100644
--- a/rllib/agents/sac/tests/test_sac.py
+++ b/rllib/agents/sac/tests/test_sac.py
@@ -11,13 +11,12 @@ from ray.rllib.models.tf.tf_action_dist import SquashedGaussian
 from ray.rllib.models.torch.torch_action_dist import TorchSquashedGaussian
 from ray.rllib.execution.replay_buffer import LocalReplayBuffer
 from ray.rllib.policy.sample_batch import SampleBatch
-from ray.rllib.utils.framework import try_import_tf, try_import_torch
+from ray.rllib.utils.framework import try_import_torch
 from ray.rllib.utils.numpy import fc, relu
 from ray.rllib.utils.test_utils import check, check_compute_single_action, \
     framework_iterator
 from ray.rllib.utils.torch_ops import convert_to_torch_tensor
 
-tf = try_import_tf()
 torch, _ = try_import_torch()
 
 
diff --git a/rllib/agents/trainer.py b/rllib/agents/trainer.py
index 6a99bc9c7..74f272f87 100644
--- a/rllib/agents/trainer.py
+++ b/rllib/agents/trainer.py
@@ -35,7 +35,7 @@ from ray.tune.resources import Resources
 from ray.tune.logger import Logger, UnifiedLogger
 from ray.tune.result import DEFAULT_RESULTS_DIR
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 logger = logging.getLogger(__name__)
 
@@ -595,12 +595,12 @@ class Trainer(Trainable):
             self.config.pop("eager")
 
         # Enable eager/tracing support.
-        if tf and self.config["framework"] == "tfe":
-            if not tf.executing_eagerly():
-                tf.enable_eager_execution()
+        if tf1 and self.config["framework"] == "tfe":
+            if not tf1.executing_eagerly():
+                tf1.enable_eager_execution()
             logger.info("Executing eagerly, with eager_tracing={}".format(
                 self.config["eager_tracing"]))
-        if tf and not tf.executing_eagerly() and \
+        if tf1 and not tf1.executing_eagerly() and \
                 self.config["framework"] != "torch":
             logger.info("Tip: set framework=tfe or the --eager flag to enable "
                         "TensorFlow eager execution")
@@ -634,8 +634,8 @@ class Trainer(Trainable):
             logging.getLogger("ray.rllib").setLevel(self.config["log_level"])
 
         def get_scope():
-            if tf and not tf.executing_eagerly():
-                return tf.Graph().as_default()
+            if tf1 and not tf1.executing_eagerly():
+                return tf1.Graph().as_default()
             else:
                 return open(os.devnull)  # fake a no-op scope
 
diff --git a/rllib/contrib/alpha_zero/core/alpha_zero_trainer.py b/rllib/contrib/alpha_zero/core/alpha_zero_trainer.py
index fa0345455..e5bd8c825 100644
--- a/rllib/contrib/alpha_zero/core/alpha_zero_trainer.py
+++ b/rllib/contrib/alpha_zero/core/alpha_zero_trainer.py
@@ -12,14 +12,13 @@ from ray.rllib.execution.metric_ops import StandardMetricsReporting
 from ray.rllib.models.catalog import ModelCatalog
 from ray.rllib.models.model import restore_original_dimensions
 from ray.rllib.models.torch.torch_action_dist import TorchCategorical
-from ray.rllib.utils.framework import try_import_tf, try_import_torch
+from ray.rllib.utils.framework import try_import_torch
 from ray.tune.registry import ENV_CREATOR, _global_registry
 
 from ray.rllib.contrib.alpha_zero.core.alpha_zero_policy import AlphaZeroPolicy
 from ray.rllib.contrib.alpha_zero.core.mcts import MCTS
 from ray.rllib.contrib.alpha_zero.core.ranked_rewards import get_r2_env_wrapper
 
-tf = try_import_tf()
 torch, nn = try_import_torch()
 
 logger = logging.getLogger(__name__)
diff --git a/rllib/contrib/maddpg/maddpg_policy.py b/rllib/contrib/maddpg/maddpg_policy.py
index 2d86dffe0..e6977b6e7 100644
--- a/rllib/contrib/maddpg/maddpg_policy.py
+++ b/rllib/contrib/maddpg/maddpg_policy.py
@@ -15,7 +15,7 @@ import numpy as np
 
 logger = logging.getLogger(__name__)
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 tfp = try_import_tfp()
 
 
@@ -49,7 +49,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
         # _____ Initial Configuration
         config = dict(ray.rllib.contrib.maddpg.DEFAULT_CONFIG, **config)
         self.config = config
-        self.global_step = tf.train.get_or_create_global_step()
+        self.global_step = tf1.train.get_or_create_global_step()
 
         # FIXME: Get done from info is required since agentwise done is not
         # supported now.
@@ -88,7 +88,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
         # Placeholders for policy evaluation and updates
         def _make_ph_n(space_n, name=""):
             return [
-                tf.placeholder(
+                tf1.placeholder(
                     tf.float32,
                     shape=(None, ) + space.shape,
                     name=name + "_%d" % i) for i, space in enumerate(space_n)
@@ -98,9 +98,9 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
         act_ph_n = _make_ph_n(act_space_n, "actions")
         new_obs_ph_n = _make_ph_n(obs_space_n, "new_obs")
         new_act_ph_n = _make_ph_n(act_space_n, "new_actions")
-        rew_ph = tf.placeholder(
+        rew_ph = tf1.placeholder(
             tf.float32, shape=None, name="rewards_{}".format(agent_id))
-        done_ph = tf.placeholder(
+        done_ph = tf1.placeholder(
             tf.float32, shape=None, name="dones_{}".format(agent_id))
 
         if config["use_local_critic"]:
@@ -190,12 +190,12 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
 
         # _____ Optimizers
         self.optimizers = {
-            "critic": tf.train.AdamOptimizer(config["critic_lr"]),
-            "actor": tf.train.AdamOptimizer(config["actor_lr"])
+            "critic": tf1.train.AdamOptimizer(config["critic_lr"]),
+            "actor": tf1.train.AdamOptimizer(config["actor_lr"])
         }
 
         # _____ Build variable update ops.
-        self.tau = tf.placeholder_with_default(
+        self.tau = tf1.placeholder_with_default(
             config["tau"], shape=(), name="tau")
 
         def _make_target_update_op(vs, target_vs, tau):
@@ -213,7 +213,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
             for v in variables.values():
                 vs += v
             phs = [
-                tf.placeholder(
+                tf1.placeholder(
                     tf.float32,
                     shape=v.get_shape(),
                     name=v.name.split(":")[0] + "_ph") for v in vs
@@ -230,7 +230,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
 
         # _____ TensorFlow Initialization
 
-        self.sess = tf.get_default_session()
+        self.sess = tf1.get_default_session()
 
         def _make_loss_inputs(placeholders):
             return [(ph.name.split("/")[-1].split(":")[0], ph)
@@ -251,7 +251,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
             loss_inputs=loss_inputs,
             dist_inputs=actor_feature)
 
-        self.sess.run(tf.global_variables_initializer())
+        self.sess.run(tf1.global_variables_initializer())
 
         # Hard initial update
         self.update_target(1.0)
@@ -280,8 +280,8 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
         critic_apply_op = self.optimizers["critic"].apply_gradients(
             self.gvs["critic"])
 
-        with tf.control_dependencies([tf.assign_add(self.global_step, 1)]):
-            with tf.control_dependencies([critic_apply_op]):
+        with tf1.control_dependencies([tf1.assign_add(self.global_step, 1)]):
+            with tf1.control_dependencies([critic_apply_op]):
                 actor_apply_op = self.optimizers["actor"].apply_gradients(
                     self.gvs["actor"])
 
@@ -324,7 +324,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
                               hiddens,
                               activation=None,
                               scope=None):
-        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope:
+        with tf1.variable_scope(scope, reuse=tf1.AUTO_REUSE) as scope:
             if use_state_preprocessor:
                 model_n = [
                     ModelCatalog.get_model({
@@ -341,11 +341,12 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
                 out = tf.concat(obs_n + act_n, axis=1)
 
             for hidden in hiddens:
-                out = tf.layers.dense(out, units=hidden, activation=activation)
+                out = tf1.layers.dense(
+                    out, units=hidden, activation=activation)
             feature = out
-            out = tf.layers.dense(feature, units=1, activation=None)
+            out = tf1.layers.dense(feature, units=1, activation=None)
 
-        return out, feature, model_n, tf.global_variables(scope.name)
+        return out, feature, model_n, tf1.global_variables(scope.name)
 
     def _build_actor_network(self,
                              obs,
@@ -355,7 +356,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
                              hiddens,
                              activation=None,
                              scope=None):
-        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope:
+        with tf1.variable_scope(scope, reuse=tf1.AUTO_REUSE) as scope:
             if use_state_preprocessor:
                 model = ModelCatalog.get_model({
                     "obs": obs,
@@ -367,13 +368,14 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
                 out = obs
 
             for hidden in hiddens:
-                out = tf.layers.dense(out, units=hidden, activation=activation)
-            feature = tf.layers.dense(
+                out = tf1.layers.dense(
+                    out, units=hidden, activation=activation)
+            feature = tf1.layers.dense(
                 out, units=act_space.shape[0], activation=None)
             sampler = tfp.distributions.RelaxedOneHotCategorical(
                 temperature=1.0, logits=feature).sample()
 
-        return sampler, feature, model, tf.global_variables(scope.name)
+        return sampler, feature, model, tf1.global_variables(scope.name)
 
     def update_target(self, tau=None):
         if tau is not None:
diff --git a/rllib/evaluation/rollout_worker.py b/rllib/evaluation/rollout_worker.py
index d61e91373..157249976 100644
--- a/rllib/evaluation/rollout_worker.py
+++ b/rllib/evaluation/rollout_worker.py
@@ -50,7 +50,7 @@ if TYPE_CHECKING:
 # Generic type var for foreach_* methods.
 T = TypeVar("T")
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 logger = logging.getLogger(__name__)
@@ -283,12 +283,12 @@ class RolloutWorker(ParallelIteratorWorker):
         ParallelIteratorWorker.__init__(self, gen_rollouts, False)
 
         policy_config: TrainerConfigDict = policy_config or {}
-        if (tf and policy_config.get("framework") == "tfe"
+        if (tf1 and policy_config.get("framework") == "tfe"
                 and not policy_config.get("no_eager_on_workers")
                 # This eager check is necessary for certain all-framework tests
                 # that use tf's eager_mode() context generator.
-                and not tf.executing_eagerly()):
-            tf.enable_eager_execution()
+                and not tf1.executing_eagerly()):
+            tf1.enable_eager_execution()
 
         if log_level:
             logging.getLogger("ray.rllib").setLevel(log_level)
@@ -382,21 +382,21 @@ class RolloutWorker(ParallelIteratorWorker):
                 torch.manual_seed(seed)
             except AssertionError:
                 logger.info("Could not seed torch")
-        if _has_tensorflow_graph(policy_dict) and not (tf and
-                                                       tf.executing_eagerly()):
-            if not tf:
+        if _has_tensorflow_graph(policy_dict) and not (
+                tf1 and tf1.executing_eagerly()):
+            if not tf1:
                 raise ImportError("Could not import tensorflow")
-            with tf.Graph().as_default():
+            with tf1.Graph().as_default():
                 if tf_session_creator:
                     self.tf_sess = tf_session_creator()
                 else:
-                    self.tf_sess = tf.Session(
-                        config=tf.ConfigProto(
-                            gpu_options=tf.GPUOptions(allow_growth=True)))
+                    self.tf_sess = tf1.Session(
+                        config=tf1.ConfigProto(
+                            gpu_options=tf1.GPUOptions(allow_growth=True)))
                 with self.tf_sess.as_default():
                     # set graph-level seed
                     if seed is not None:
-                        tf.set_random_seed(seed)
+                        tf1.set_random_seed(seed)
                     self.policy_map, self.preprocessors = \
                         self._build_policy_map(policy_dict, policy_config)
             if (ray.is_initialized()
@@ -406,7 +406,7 @@ class RolloutWorker(ParallelIteratorWorker):
                         "Creating policy evaluation worker {}".format(
                             worker_index) +
                         " on CPU (please ignore any CUDA init errors)")
-                elif not tf.test.is_gpu_available():
+                elif not tf1.test.is_gpu_available():
                     raise RuntimeError(
                         "GPUs were assigned to this worker by Ray, but "
                         "TensorFlow reports GPU acceleration is disabled. "
@@ -956,7 +956,7 @@ class RolloutWorker(ParallelIteratorWorker):
                     "Found raw Tuple|Dict space as input to policy. "
                     "Please preprocess these observations with a "
                     "Tuple|DictFlatteningPreprocessor.")
-            if tf and tf.executing_eagerly():
+            if tf1 and tf1.executing_eagerly():
                 if hasattr(cls, "as_eager"):
                     cls = cls.as_eager()
                     if policy_config["eager_tracing"]:
@@ -966,8 +966,8 @@ class RolloutWorker(ParallelIteratorWorker):
                 else:
                     raise ValueError("This policy does not support eager "
                                      "execution: {}".format(cls))
-            if tf:
-                with tf.variable_scope(name):
+            if tf1:
+                with tf1.variable_scope(name):
                     policy_map[name] = cls(obs_space, act_space, merged_conf)
             else:
                 policy_map[name] = cls(obs_space, act_space, merged_conf)
diff --git a/rllib/evaluation/worker_set.py b/rllib/evaluation/worker_set.py
index ed79b6444..46f6a86ff 100644
--- a/rllib/evaluation/worker_set.py
+++ b/rllib/evaluation/worker_set.py
@@ -14,7 +14,7 @@ from ray.rllib.utils import merge_dicts
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.types import PolicyID, TrainerConfigDict, EnvType
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 logger = logging.getLogger(__name__)
 
@@ -202,8 +202,8 @@ class WorkerSet:
         def session_creator():
             logger.debug("Creating TF session {}".format(
                 config["tf_session_args"]))
-            return tf.Session(
-                config=tf.ConfigProto(**config["tf_session_args"]))
+            return tf1.Session(
+                config=tf1.ConfigProto(**config["tf_session_args"]))
 
         if isinstance(config["input"], FunctionType):
             input_creator = config["input"]
diff --git a/rllib/examples/attention_net.py b/rllib/examples/attention_net.py
index 02c8d96b8..8a4089dd8 100644
--- a/rllib/examples/attention_net.py
+++ b/rllib/examples/attention_net.py
@@ -11,7 +11,7 @@ from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.test_utils import check_learning_achieved
 from ray.tune import registry
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--run", type=str, default="PPO")
diff --git a/rllib/examples/attention_net_supervised.py b/rllib/examples/attention_net_supervised.py
index 642bed2db..0282a6195 100644
--- a/rllib/examples/attention_net_supervised.py
+++ b/rllib/examples/attention_net_supervised.py
@@ -4,7 +4,7 @@ import numpy as np
 from rllib.models.tf.attention_net import TrXLNet
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 def bit_shift_generator(seq_length, shift, batch_size):
diff --git a/rllib/examples/batch_norm_model.py b/rllib/examples/batch_norm_model.py
index fa41a0add..5159a166f 100644
--- a/rllib/examples/batch_norm_model.py
+++ b/rllib/examples/batch_norm_model.py
@@ -10,7 +10,7 @@ from ray.rllib.models import ModelCatalog
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.test_utils import check_learning_achieved
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--run", type=str, default="PPO")
diff --git a/rllib/examples/centralized_critic.py b/rllib/examples/centralized_critic.py
index 260d8494e..42c086dc1 100644
--- a/rllib/examples/centralized_critic.py
+++ b/rllib/examples/centralized_critic.py
@@ -39,7 +39,7 @@ from ray.rllib.utils.test_utils import check_learning_achieved
 from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable
 from ray.rllib.utils.torch_ops import convert_to_torch_tensor
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 OPPONENT_OBS = "opponent_obs"
diff --git a/rllib/examples/custom_env.py b/rllib/examples/custom_env.py
index 8963f4882..bde0f473c 100644
--- a/rllib/examples/custom_env.py
+++ b/rllib/examples/custom_env.py
@@ -23,7 +23,7 @@ from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 from ray.rllib.utils.test_utils import check_learning_achieved
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 parser = argparse.ArgumentParser()
diff --git a/rllib/examples/custom_keras_model.py b/rllib/examples/custom_keras_model.py
index aac7e41d4..9a9279670 100644
--- a/rllib/examples/custom_keras_model.py
+++ b/rllib/examples/custom_keras_model.py
@@ -12,7 +12,7 @@ from ray.rllib.models.tf.tf_modelv2 import TFModelV2
 from ray.rllib.models.tf.visionnet import VisionNetwork as MyVisionNetwork
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--run", type=str, default="DQN")  # Try PG, PPO, DQN
diff --git a/rllib/examples/custom_loss.py b/rllib/examples/custom_loss.py
index 9d3d90348..0f47632d9 100644
--- a/rllib/examples/custom_loss.py
+++ b/rllib/examples/custom_loss.py
@@ -21,7 +21,7 @@ from ray.rllib.examples.models.custom_loss_model import CustomLossModel, \
 from ray.rllib.models import ModelCatalog
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--torch", action="store_true")
diff --git a/rllib/examples/custom_tf_policy.py b/rllib/examples/custom_tf_policy.py
index e2a919273..6e1a968e3 100644
--- a/rllib/examples/custom_tf_policy.py
+++ b/rllib/examples/custom_tf_policy.py
@@ -7,7 +7,7 @@ from ray.rllib.evaluation.postprocessing import discount
 from ray.rllib.policy.tf_policy_template import build_tf_policy
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--stop-iters", type=int, default=200)
diff --git a/rllib/examples/eager_execution.py b/rllib/examples/eager_execution.py
index f3f05328e..fb603b23b 100644
--- a/rllib/examples/eager_execution.py
+++ b/rllib/examples/eager_execution.py
@@ -11,7 +11,7 @@ from ray.rllib.policy.tf_policy_template import build_tf_policy
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.test_utils import check_learning_achieved
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--stop-iters", type=int, default=200)
diff --git a/rllib/examples/export/cartpole_dqn_export.py b/rllib/examples/export/cartpole_dqn_export.py
index 46ab741a9..8b315dd79 100644
--- a/rllib/examples/export/cartpole_dqn_export.py
+++ b/rllib/examples/export/cartpole_dqn_export.py
@@ -6,7 +6,7 @@ import ray
 from ray.rllib.agents.registry import get_agent_class
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 ray.init(num_cpus=10)
 
@@ -25,14 +25,14 @@ def train_and_export(algo_name, num_steps, model_dir, ckpt_dir, prefix):
 
 def restore_saved_model(export_dir):
     signature_key = \
-        tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
-    g = tf.Graph()
+        tf1.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+    g = tf1.Graph()
     with g.as_default():
-        with tf.Session(graph=g) as sess:
+        with tf1.Session(graph=g) as sess:
             meta_graph_def = \
-                tf.saved_model.load(sess,
-                                    [tf.saved_model.tag_constants.SERVING],
-                                    export_dir)
+                tf1.saved_model.load(sess,
+                                     [tf1.saved_model.tag_constants.SERVING],
+                                     export_dir)
             print("Model restored!")
             print("Signature Def Information:")
             print(meta_graph_def.signature_def[signature_key])
@@ -41,13 +41,13 @@ def restore_saved_model(export_dir):
 
 
 def restore_checkpoint(export_dir, prefix):
-    sess = tf.Session()
+    sess = tf1.Session()
     meta_file = "%s.meta" % prefix
-    saver = tf.train.import_meta_graph(os.path.join(export_dir, meta_file))
+    saver = tf1.train.import_meta_graph(os.path.join(export_dir, meta_file))
     saver.restore(sess, os.path.join(export_dir, prefix))
     print("Checkpoint restored!")
     print("Variables Information:")
-    for v in tf.trainable_variables():
+    for v in tf1.trainable_variables():
         value = sess.run(v)
         print(v.name, value)
 
diff --git a/rllib/examples/mobilenet_v2_with_lstm.py b/rllib/examples/mobilenet_v2_with_lstm.py
index e0f066a13..4f8ba7edc 100644
--- a/rllib/examples/mobilenet_v2_with_lstm.py
+++ b/rllib/examples/mobilenet_v2_with_lstm.py
@@ -13,7 +13,7 @@ from ray.rllib.examples.models.mobilenet_v2_with_lstm_models import \
 from ray.rllib.models import ModelCatalog
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 cnn_shape = (4, 4, 3)
 # The torch version of MobileNetV2 does channels first.
diff --git a/rllib/examples/models/autoregressive_action_dist.py b/rllib/examples/models/autoregressive_action_dist.py
index 5385e0b15..929a7d782 100644
--- a/rllib/examples/models/autoregressive_action_dist.py
+++ b/rllib/examples/models/autoregressive_action_dist.py
@@ -3,7 +3,7 @@ from ray.rllib.models.torch.torch_action_dist import TorchCategorical, \
     TorchDistributionWrapper
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 
diff --git a/rllib/examples/models/autoregressive_action_model.py b/rllib/examples/models/autoregressive_action_model.py
index 06f32f78a..5602f9b52 100644
--- a/rllib/examples/models/autoregressive_action_model.py
+++ b/rllib/examples/models/autoregressive_action_model.py
@@ -7,7 +7,7 @@ from ray.rllib.models.torch.misc import SlimFC
 from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 
diff --git a/rllib/examples/models/batch_norm_model.py b/rllib/examples/models/batch_norm_model.py
index 762793de2..5091415ec 100644
--- a/rllib/examples/models/batch_norm_model.py
+++ b/rllib/examples/models/batch_norm_model.py
@@ -9,7 +9,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 
@@ -39,27 +39,27 @@ class BatchNormModel(TFModelV2):
     def forward(self, input_dict, state, seq_lens):
         last_layer = input_dict["obs"]
         hiddens = [256, 256]
-        with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
+        with tf1.variable_scope("model", reuse=tf1.AUTO_REUSE):
             for i, size in enumerate(hiddens):
-                last_layer = tf.layers.dense(
+                last_layer = tf1.layers.dense(
                     last_layer,
                     size,
                     kernel_initializer=normc_initializer(1.0),
                     activation=tf.nn.tanh,
                     name="fc{}".format(i))
                 # Add a batch norm layer
-                last_layer = tf.layers.batch_normalization(
+                last_layer = tf1.layers.batch_normalization(
                     last_layer,
                     training=input_dict["is_training"],
                     name="bn_{}".format(i))
 
-            output = tf.layers.dense(
+            output = tf1.layers.dense(
                 last_layer,
                 self.num_outputs,
                 kernel_initializer=normc_initializer(0.01),
                 activation=None,
                 name="out")
-            self._value_out = tf.layers.dense(
+            self._value_out = tf1.layers.dense(
                 last_layer,
                 1,
                 kernel_initializer=normc_initializer(1.0),
@@ -67,8 +67,8 @@ class BatchNormModel(TFModelV2):
                 name="vf")
         if not self._registered:
             self.register_variables(
-                tf.get_collection(
-                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=".+/model/.+"))
+                tf1.get_collection(
+                    tf1.GraphKeys.TRAINABLE_VARIABLES, scope=".+/model/.+"))
             self._registered = True
 
         return output, []
diff --git a/rllib/examples/models/centralized_critic_models.py b/rllib/examples/models/centralized_critic_models.py
index 030ab66fe..276f42381 100644
--- a/rllib/examples/models/centralized_critic_models.py
+++ b/rllib/examples/models/centralized_critic_models.py
@@ -9,7 +9,7 @@ from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 
diff --git a/rllib/examples/models/custom_loss_model.py b/rllib/examples/models/custom_loss_model.py
index 0a536c7d2..a0fa41c2b 100644
--- a/rllib/examples/models/custom_loss_model.py
+++ b/rllib/examples/models/custom_loss_model.py
@@ -10,7 +10,7 @@ from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 from ray.rllib.offline import JsonReader
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 
@@ -73,7 +73,7 @@ class DeprecatedCustomLossModelV1(Model):
 
     def _build_layers_v2(self, input_dict, num_outputs, options):
         self.obs_in = input_dict["obs"]
-        with tf.variable_scope("shared", reuse=tf.AUTO_REUSE):
+        with tf1.variable_scope("shared", reuse=tf1.AUTO_REUSE):
             self.fcnet = FullyConnectedNetwork(input_dict, self.obs_space,
                                                self.action_space, num_outputs,
                                                options)
diff --git a/rllib/examples/models/eager_model.py b/rllib/examples/models/eager_model.py
index 6e2d44c04..a20236711 100644
--- a/rllib/examples/models/eager_model.py
+++ b/rllib/examples/models/eager_model.py
@@ -6,7 +6,7 @@ from ray.rllib.models.tf.tf_modelv2 import TFModelV2
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class EagerModel(TFModelV2):
@@ -34,7 +34,7 @@ class EagerModel(TFModelV2):
 
         def lambda_(x):
             eager_out = tf.py_function(self.forward_eager, [x], tf.float32)
-            with tf.control_dependencies([eager_out]):
+            with tf1.control_dependencies([eager_out]):
                 eager_out.set_shape(x.shape)
                 return eager_out
 
diff --git a/rllib/examples/models/fast_model.py b/rllib/examples/models/fast_model.py
index 7e6528db7..c377608d3 100644
--- a/rllib/examples/models/fast_model.py
+++ b/rllib/examples/models/fast_model.py
@@ -5,7 +5,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 
@@ -25,11 +25,11 @@ class FastModel(TFModelV2):
 
     @override(ModelV2)
     def forward(self, input_dict, state, seq_lens):
-        with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
-            bias = tf.get_variable(
+        with tf1.variable_scope("model", reuse=tf1.AUTO_REUSE):
+            bias = tf1.get_variable(
                 dtype=tf.float32,
                 name="bias",
-                initializer=tf.zeros_initializer,
+                initializer=tf.keras.initializers.Zeros(),
                 shape=())
             output = bias + \
                 tf.zeros([tf.shape(input_dict["obs"])[0], self.num_outputs])
@@ -37,8 +37,8 @@ class FastModel(TFModelV2):
 
         if not self._registered:
             self.register_variables(
-                tf.get_collection(
-                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=".+/model/.+"))
+                tf1.get_collection(
+                    tf1.GraphKeys.TRAINABLE_VARIABLES, scope=".+/model/.+"))
             self._registered = True
 
         return output, []
diff --git a/rllib/examples/models/mobilenet_v2_with_lstm_models.py b/rllib/examples/models/mobilenet_v2_with_lstm_models.py
index 3bc7052be..5b0aa8248 100644
--- a/rllib/examples/models/mobilenet_v2_with_lstm_models.py
+++ b/rllib/examples/models/mobilenet_v2_with_lstm_models.py
@@ -7,7 +7,7 @@ from ray.rllib.models.torch.recurrent_net import RecurrentNetwork as TorchRNN
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 
diff --git a/rllib/examples/models/parametric_actions_model.py b/rllib/examples/models/parametric_actions_model.py
index f0c62935d..225399286 100644
--- a/rllib/examples/models/parametric_actions_model.py
+++ b/rllib/examples/models/parametric_actions_model.py
@@ -9,7 +9,7 @@ from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 from ray.rllib.utils.numpy import LARGE_INTEGER
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 
diff --git a/rllib/examples/models/rnn_model.py b/rllib/examples/models/rnn_model.py
index 55d6f940b..4b3d3db9e 100644
--- a/rllib/examples/models/rnn_model.py
+++ b/rllib/examples/models/rnn_model.py
@@ -7,7 +7,7 @@ from ray.rllib.models.torch.recurrent_net import RecurrentNetwork as TorchRNN
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 
diff --git a/rllib/examples/models/rnn_spy_model.py b/rllib/examples/models/rnn_spy_model.py
index 18f06f202..1b1d95f1e 100644
--- a/rllib/examples/models/rnn_spy_model.py
+++ b/rllib/examples/models/rnn_spy_model.py
@@ -8,13 +8,15 @@ from ray.rllib.models.tf.recurrent_net import RecurrentNetwork
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class SpyLayer(tf.keras.layers.Layer):
     """A keras Layer, which intercepts its inputs and stored them as pickled.
     """
 
+    output = np.array(0, dtype=np.int64)
+
     def __init__(self, num_outputs, **kwargs):
         super().__init__(**kwargs)
 
@@ -26,7 +28,7 @@ class SpyLayer(tf.keras.layers.Layer):
         """
 
         del kwargs
-        spy_fn = tf.py_func(
+        spy_fn = tf1.py_func(
             self.spy,
             [
                 inputs[0],  # observations
@@ -36,11 +38,11 @@ class SpyLayer(tf.keras.layers.Layer):
                 inputs[5],  # h_out
                 inputs[6],  # c_out
             ],
-            tf.int64,
+            tf.int64,  # Must match SpyLayer.output's type.
             stateful=True)
 
         # Compute outputs
-        with tf.control_dependencies([spy_fn]):
+        with tf1.control_dependencies([spy_fn]):
             return self.dense(inputs[1])
 
     @staticmethod
@@ -48,7 +50,8 @@ class SpyLayer(tf.keras.layers.Layer):
         """The actual spy operation: Store inputs in internal_kv."""
 
         if len(inputs) == 1:
-            return 0  # don't capture inference inputs
+            # don't capture inference inputs
+            return SpyLayer.output
         # TF runs this function in an isolated context, so we have to use
         # redis to communicate back to our suite
         ray.experimental.internal_kv._internal_kv_put(
@@ -61,7 +64,7 @@ class SpyLayer(tf.keras.layers.Layer):
             }),
             overwrite=True)
         RNNSpyModel.capture_index += 1
-        return 0
+        return SpyLayer.output
 
 
 class RNNSpyModel(RecurrentNetwork):
diff --git a/rllib/examples/models/shared_weights_model.py b/rllib/examples/models/shared_weights_model.py
index 137396a2f..24e9a041f 100644
--- a/rllib/examples/models/shared_weights_model.py
+++ b/rllib/examples/models/shared_weights_model.py
@@ -7,7 +7,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 
@@ -15,7 +15,7 @@ class SharedWeightsModel1(TFModelV2):
     """Example of weight sharing between two different TFModelV2s.
 
     Here, we share the variables defined in the 'shared' variable scope
-    by entering it explicitly with tf.AUTO_REUSE. This creates the
+    by entering it explicitly with tf1.AUTO_REUSE. This creates the
     variables for the 'fc1' layer in a global scope called 'shared'
     (outside of the Policy's normal variable scope).
     """
@@ -26,9 +26,9 @@ class SharedWeightsModel1(TFModelV2):
                          model_config, name)
 
         inputs = tf.keras.layers.Input(observation_space.shape)
-        with tf.variable_scope(
-                tf.VariableScope(tf.AUTO_REUSE, "shared"),
-                reuse=tf.AUTO_REUSE,
+        with tf1.variable_scope(
+                tf1.VariableScope(tf1.AUTO_REUSE, "shared"),
+                reuse=tf1.AUTO_REUSE,
                 auxiliary_name_scope=False):
             last_layer = tf.keras.layers.Dense(
                 units=64, activation=tf.nn.relu, name="fc1")(inputs)
@@ -60,9 +60,9 @@ class SharedWeightsModel2(TFModelV2):
         inputs = tf.keras.layers.Input(observation_space.shape)
 
         # Weights shared with SharedWeightsModel1.
-        with tf.variable_scope(
-                tf.VariableScope(tf.AUTO_REUSE, "shared"),
-                reuse=tf.AUTO_REUSE,
+        with tf1.variable_scope(
+                tf1.VariableScope(tf1.AUTO_REUSE, "shared"),
+                reuse=tf1.AUTO_REUSE,
                 auxiliary_name_scope=False):
             last_layer = tf.keras.layers.Dense(
                 units=64, activation=tf.nn.relu, name="fc1")(inputs)
diff --git a/rllib/examples/models/simple_rpg_model.py b/rllib/examples/models/simple_rpg_model.py
index b77428745..6126ea688 100644
--- a/rllib/examples/models/simple_rpg_model.py
+++ b/rllib/examples/models/simple_rpg_model.py
@@ -4,7 +4,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
 from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFCNet
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 
diff --git a/rllib/examples/multi_agent_cartpole.py b/rllib/examples/multi_agent_cartpole.py
index c7b41c36b..64cde0f5c 100644
--- a/rllib/examples/multi_agent_cartpole.py
+++ b/rllib/examples/multi_agent_cartpole.py
@@ -22,7 +22,7 @@ from ray.rllib.models import ModelCatalog
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.test_utils import check_learning_achieved
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 parser = argparse.ArgumentParser()
 
diff --git a/rllib/examples/rock_paper_scissors_multiagent.py b/rllib/examples/rock_paper_scissors_multiagent.py
index 97e3ec4e5..f9a22a596 100644
--- a/rllib/examples/rock_paper_scissors_multiagent.py
+++ b/rllib/examples/rock_paper_scissors_multiagent.py
@@ -20,7 +20,7 @@ from ray.rllib.examples.policy.rock_paper_scissors_dummies import \
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 from ray.rllib.utils.test_utils import check_learning_achieved
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 parser = argparse.ArgumentParser()
diff --git a/rllib/execution/multi_gpu_impl.py b/rllib/execution/multi_gpu_impl.py
index 0771bb18b..010ee477c 100644
--- a/rllib/execution/multi_gpu_impl.py
+++ b/rllib/execution/multi_gpu_impl.py
@@ -5,7 +5,7 @@ from ray.util.debug import log_once
 from ray.rllib.utils.debug import summarize
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 # Variable scope in which created variables will be placed under
 TOWER_SCOPE_NAME = "tower"
@@ -26,7 +26,7 @@ class LocalSyncParallelOptimizer:
     `load_data`, so you can make multiple passes (possibly in randomized order)
     over the same data once loaded.
 
-    This is similar to tf.train.SyncReplicasOptimizer, but works within a
+    This is similar to tf1.train.SyncReplicasOptimizer, but works within a
     single TensorFlow graph, i.e. implements in-graph replicated training:
 
       https://www.tensorflow.org/api_docs/python/tf/train/SyncReplicasOptimizer
@@ -63,21 +63,21 @@ class LocalSyncParallelOptimizer:
         self.build_graph = build_graph
 
         # First initialize the shared loss network
-        with tf.name_scope(TOWER_SCOPE_NAME):
+        with tf1.name_scope(TOWER_SCOPE_NAME):
             self._shared_loss = build_graph(self.loss_inputs)
-        shared_ops = tf.get_collection(
-            tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name)
+        shared_ops = tf1.get_collection(
+            tf1.GraphKeys.UPDATE_OPS, scope=tf1.get_variable_scope().name)
 
         # Then setup the per-device loss graphs that use the shared weights
-        self._batch_index = tf.placeholder(tf.int32, name="batch_index")
+        self._batch_index = tf1.placeholder(tf.int32, name="batch_index")
 
         # Dynamic batch size, which may be shrunk if there isn't enough data
-        self._per_device_batch_size = tf.placeholder(
+        self._per_device_batch_size = tf1.placeholder(
             tf.int32, name="per_device_batch_size")
         self._loaded_per_device_batch_size = max_per_device_batch_size
 
         # When loading RNN input, we dynamically determine the max seq len
-        self._max_seq_len = tf.placeholder(tf.int32, name="max_seq_len")
+        self._max_seq_len = tf1.placeholder(tf.int32, name="max_seq_len")
         self._loaded_max_seq_len = 1
 
         # Split on the CPU in case the data doesn't fit in GPU memory.
@@ -103,15 +103,15 @@ class LocalSyncParallelOptimizer:
         # gather update ops for any batch norm layers. TODO(ekl) here we will
         # use all the ops found which won't work for DQN / DDPG, but those
         # aren't supported with multi-gpu right now anyways.
-        self._update_ops = tf.get_collection(
-            tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name)
+        self._update_ops = tf1.get_collection(
+            tf1.GraphKeys.UPDATE_OPS, scope=tf1.get_variable_scope().name)
         for op in shared_ops:
             self._update_ops.remove(op)  # only care about tower update ops
         if self._update_ops:
             logger.debug("Update ops to run on apply gradient: {}".format(
                 self._update_ops))
 
-        with tf.control_dependencies(self._update_ops):
+        with tf1.control_dependencies(self._update_ops):
             self._train_op = self.optimizer.apply_gradients(avg)
 
     def load_data(self, sess, inputs, state_inputs):
@@ -265,11 +265,11 @@ class LocalSyncParallelOptimizer:
     def _setup_device(self, device, device_input_placeholders, num_data_in):
         assert num_data_in <= len(device_input_placeholders)
         with tf.device(device):
-            with tf.name_scope(TOWER_SCOPE_NAME):
+            with tf1.name_scope(TOWER_SCOPE_NAME):
                 device_input_batches = []
                 device_input_slices = []
                 for i, ph in enumerate(device_input_placeholders):
-                    current_batch = tf.Variable(
+                    current_batch = tf1.Variable(
                         ph,
                         trainable=False,
                         validate_shape=False,
diff --git a/rllib/execution/multi_gpu_learner.py b/rllib/execution/multi_gpu_learner.py
index 5d2d2c220..4e1b2a342 100644
--- a/rllib/execution/multi_gpu_learner.py
+++ b/rllib/execution/multi_gpu_learner.py
@@ -13,7 +13,7 @@ from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.timer import TimerStat
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 logger = logging.getLogger(__name__)
 
@@ -84,14 +84,15 @@ class TFMultiGPULearner(LearnerThread):
         self.par_opt = []
         with self.local_worker.tf_sess.graph.as_default():
             with self.local_worker.tf_sess.as_default():
-                with tf.variable_scope(DEFAULT_POLICY_ID, reuse=tf.AUTO_REUSE):
+                with tf1.variable_scope(
+                        DEFAULT_POLICY_ID, reuse=tf1.AUTO_REUSE):
                     if self.policy._state_inputs:
                         rnn_inputs = self.policy._state_inputs + [
                             self.policy._seq_lens
                         ]
                     else:
                         rnn_inputs = []
-                    adam = tf.train.AdamOptimizer(self.lr)
+                    adam = tf1.train.AdamOptimizer(self.lr)
                     for _ in range(num_data_loader_buffers):
                         self.par_opt.append(
                             LocalSyncParallelOptimizer(
@@ -103,7 +104,7 @@ class TFMultiGPULearner(LearnerThread):
                                 self.policy.copy))
 
                 self.sess = self.local_worker.tf_sess
-                self.sess.run(tf.global_variables_initializer())
+                self.sess.run(tf1.global_variables_initializer())
 
         self.idle_optimizers = queue.Queue()
         self.ready_optimizers = queue.Queue()
diff --git a/rllib/execution/train_ops.py b/rllib/execution/train_ops.py
index f4e794740..05cb942c7 100644
--- a/rllib/execution/train_ops.py
+++ b/rllib/execution/train_ops.py
@@ -20,7 +20,7 @@ from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.sgd import do_minibatch_sgd, averaged
 from ray.rllib.utils.types import PolicyID, SampleBatchType
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 logger = logging.getLogger(__name__)
 
@@ -137,7 +137,7 @@ class TrainTFMultiGPU:
             with self.workers.local_worker().tf_sess.as_default():
                 for policy_id in self.policies:
                     policy = self.workers.local_worker().get_policy(policy_id)
-                    with tf.variable_scope(policy_id, reuse=tf.AUTO_REUSE):
+                    with tf1.variable_scope(policy_id, reuse=tf1.AUTO_REUSE):
                         if policy._state_inputs:
                             rnn_inputs = policy._state_inputs + [
                                 policy._seq_lens
@@ -152,7 +152,7 @@ class TrainTFMultiGPU:
                                 self.per_device_batch_size, policy.copy))
 
                 self.sess = self.workers.local_worker().tf_sess
-                self.sess.run(tf.global_variables_initializer())
+                self.sess.run(tf1.global_variables_initializer())
 
     def __call__(self,
                  samples: SampleBatchType) -> (SampleBatchType, List[dict]):
diff --git a/rllib/models/catalog.py b/rllib/models/catalog.py
index 652c1e65e..8992f8065 100644
--- a/rllib/models/catalog.py
+++ b/rllib/models/catalog.py
@@ -27,7 +27,7 @@ from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.spaces.simplex import Simplex
 from ray.rllib.utils.spaces.space_utils import flatten_space
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 tree = try_import_tree()
 
 logger = logging.getLogger(__name__)
@@ -257,7 +257,7 @@ class ModelCatalog:
 
         dtype, shape = ModelCatalog.get_action_shape(action_space)
 
-        return tf.placeholder(dtype, shape=shape, name=name)
+        return tf1.placeholder(dtype, shape=shape, name=name)
 
     @staticmethod
     @DeveloperAPI
diff --git a/rllib/models/model.py b/rllib/models/model.py
index ffb56c5eb..8ff85633f 100644
--- a/rllib/models/model.py
+++ b/rllib/models/model.py
@@ -8,7 +8,7 @@ from ray.rllib.utils.annotations import PublicAPI, DeveloperAPI
 from ray.rllib.utils.deprecation import deprecation_warning
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 logger = logging.getLogger(__name__)
@@ -38,13 +38,13 @@ class Model:
         self.action_space = action_space
         self.num_outputs = num_outputs
         self.options = options
-        self.scope = tf.get_variable_scope()
-        self.session = tf.get_default_session()
+        self.scope = tf1.get_variable_scope()
+        self.session = tf1.get_default_session()
         self.input_dict = input_dict
         if seq_lens is not None:
             self.seq_lens = seq_lens
         else:
-            self.seq_lens = tf.placeholder(
+            self.seq_lens = tf1.placeholder(
                 dtype=tf.int32, shape=[None], name="seq_lens")
 
         self._num_outputs = num_outputs
@@ -68,10 +68,10 @@ class Model:
                 input_dict["obs"], num_outputs, options)
 
         if options.get("free_log_std", False):
-            log_std = tf.get_variable(
+            log_std = tf1.get_variable(
                 name="log_std",
                 shape=[num_outputs],
-                initializer=tf.zeros_initializer)
+                initializer=tf1.zeros_initializer)
             self.outputs = tf.concat(
                 [self.outputs, 0.0 * self.outputs + log_std], 1)
 
@@ -196,7 +196,7 @@ class Model:
 def flatten(obs, framework):
     """Flatten the given tensor."""
     if framework == "tf":
-        return tf.layers.flatten(obs)
+        return tf1.layers.flatten(obs)
     elif framework == "torch":
         assert torch is not None
         return torch.flatten(obs, start_dim=1)
diff --git a/rllib/models/modelv2.py b/rllib/models/modelv2.py
index 25e070b99..7247d119f 100644
--- a/rllib/models/modelv2.py
+++ b/rllib/models/modelv2.py
@@ -13,7 +13,7 @@ from ray.rllib.utils.framework import try_import_tf, try_import_torch, \
 from ray.rllib.utils.spaces.repeated import Repeated
 from ray.rllib.utils.types import ModelConfigDict
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 
@@ -339,7 +339,7 @@ class NullContextManager:
 def flatten(obs, framework):
     """Flatten the given tensor."""
     if framework == "tf":
-        return tf.layers.flatten(obs)
+        return tf1.layers.flatten(obs)
     elif framework == "torch":
         assert torch is not None
         return torch.flatten(obs, start_dim=1)
diff --git a/rllib/models/tests/test_torch_modules.py b/rllib/models/tests/test_attention_nets.py
similarity index 98%
rename from rllib/models/tests/test_torch_modules.py
rename to rllib/models/tests/test_attention_nets.py
index c55579642..2065f226e 100644
--- a/rllib/models/tests/test_torch_modules.py
+++ b/rllib/models/tests/test_attention_nets.py
@@ -13,7 +13,7 @@ from ray.rllib.utils.framework import try_import_torch, try_import_tf
 from ray.rllib.utils.test_utils import framework_iterator
 
 torch, nn = try_import_torch()
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class TestModules(unittest.TestCase):
@@ -144,7 +144,7 @@ class TestModules(unittest.TestCase):
                 model = TorchMultiHeadAttention(
                     in_dim=D_in, out_dim=D_out, num_heads=2, head_dim=32)
 
-                self.train_torch_layer(model, x, y)
+                self.train_torch_layer(model, x, y, num_epochs=500)
 
             else:  # framework is tensorflow or tensorflow-eager
 
@@ -165,7 +165,7 @@ class TestModules(unittest.TestCase):
             that it trains in a supervised setting."""
 
         # Checks that torch and tf embedding matrices are the same
-        with tf.Session().as_default() as sess:
+        with tf1.Session().as_default() as sess:
             assert np.allclose(
                 relative_position_embedding(20, 15).eval(session=sess),
                 relative_position_embedding_torch(20, 15).numpy())
diff --git a/rllib/models/tests/test_distributions.py b/rllib/models/tests/test_distributions.py
index 4531dd00f..3a4bebd13 100644
--- a/rllib/models/tests/test_distributions.py
+++ b/rllib/models/tests/test_distributions.py
@@ -16,7 +16,7 @@ from ray.rllib.utils.numpy import MIN_LOG_NN_OUTPUT, MAX_LOG_NN_OUTPUT, \
     softmax, SMALL_NUMBER, LARGE_INTEGER
 from ray.rllib.utils.test_utils import check, framework_iterator
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 tree = try_import_tree()
 
@@ -75,13 +75,13 @@ class TestDistributions(unittest.TestCase):
     def test_categorical(self):
         """Tests the Categorical ActionDistribution (tf only)."""
         num_samples = 100000
-        logits = tf.placeholder(tf.float32, shape=(None, 10))
+        logits = tf1.placeholder(tf.float32, shape=(None, 10))
         z = 8 * (np.random.rand(10) - 0.5)
         data = np.tile(z, (num_samples, 1))
         c = Categorical(logits, {})  # dummy config dict
         sample_op = c.sample()
-        sess = tf.Session()
-        sess.run(tf.global_variables_initializer())
+        sess = tf1.Session()
+        sess.run(tf1.global_variables_initializer())
         samples = sess.run(sample_op, feed_dict={logits: data})
         counts = np.zeros(10)
         for sample in samples:
diff --git a/rllib/models/tf/attention_net.py b/rllib/models/tf/attention_net.py
index 513e99dda..c96cf6c48 100644
--- a/rllib/models/tf/attention_net.py
+++ b/rllib/models/tf/attention_net.py
@@ -17,7 +17,7 @@ from ray.rllib.models.tf.recurrent_net import RecurrentNetwork
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 # TODO(sven): Use RLlib's FCNet instead.
diff --git a/rllib/models/tf/fcnet.py b/rllib/models/tf/fcnet.py
index 2b13eea18..b016b5a97 100644
--- a/rllib/models/tf/fcnet.py
+++ b/rllib/models/tf/fcnet.py
@@ -4,7 +4,7 @@ from ray.rllib.models.tf.misc import normc_initializer
 from ray.rllib.models.tf.tf_modelv2 import TFModelV2
 from ray.rllib.utils.framework import get_activation_fn, try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class FullyConnectedNetwork(TFModelV2):
diff --git a/rllib/models/tf/fcnet_v1.py b/rllib/models/tf/fcnet_v1.py
index 54746111f..cc8ed3d59 100644
--- a/rllib/models/tf/fcnet_v1.py
+++ b/rllib/models/tf/fcnet_v1.py
@@ -4,7 +4,7 @@ from ray.rllib.utils.annotations import override
 from ray.rllib.utils.deprecation import deprecation_warning
 from ray.rllib.utils.framework import get_activation_fn, try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 # Deprecated: see as an alternative models/tf.fcnet.py
@@ -29,15 +29,15 @@ class FullyConnectedNetwork(Model):
         activation = get_activation_fn(options.get("fcnet_activation"))
 
         if len(inputs.shape) > 2:
-            inputs = tf.layers.flatten(inputs)
+            inputs = tf1.layers.flatten(inputs)
 
-        with tf.name_scope("fc_net"):
+        with tf1.name_scope("fc_net"):
             i = 1
             last_layer = inputs
             for size in hiddens:
                 # skip final linear layer
                 if options.get("no_final_linear") and i == len(hiddens):
-                    output = tf.layers.dense(
+                    output = tf1.layers.dense(
                         last_layer,
                         num_outputs,
                         kernel_initializer=normc_initializer(1.0),
@@ -46,7 +46,7 @@ class FullyConnectedNetwork(Model):
                     return output, output
 
                 label = "fc{}".format(i)
-                last_layer = tf.layers.dense(
+                last_layer = tf1.layers.dense(
                     last_layer,
                     size,
                     kernel_initializer=normc_initializer(1.0),
@@ -54,7 +54,7 @@ class FullyConnectedNetwork(Model):
                     name=label)
                 i += 1
 
-            output = tf.layers.dense(
+            output = tf1.layers.dense(
                 last_layer,
                 num_outputs,
                 kernel_initializer=normc_initializer(0.01),
diff --git a/rllib/models/tf/layers/gru_gate.py b/rllib/models/tf/layers/gru_gate.py
index f738626a8..69dba748c 100644
--- a/rllib/models/tf/layers/gru_gate.py
+++ b/rllib/models/tf/layers/gru_gate.py
@@ -1,6 +1,6 @@
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class GRUGate(tf.keras.layers.Layer):
diff --git a/rllib/models/tf/layers/multi_head_attention.py b/rllib/models/tf/layers/multi_head_attention.py
index ccc461364..04583adaa 100644
--- a/rllib/models/tf/layers/multi_head_attention.py
+++ b/rllib/models/tf/layers/multi_head_attention.py
@@ -5,7 +5,7 @@
 """
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class MultiHeadAttention(tf.keras.layers.Layer):
diff --git a/rllib/models/tf/layers/noisy_layer.py b/rllib/models/tf/layers/noisy_layer.py
index 7024c8acd..a204bd222 100644
--- a/rllib/models/tf/layers/noisy_layer.py
+++ b/rllib/models/tf/layers/noisy_layer.py
@@ -3,7 +3,7 @@ import numpy as np
 from ray.rllib.utils.framework import get_activation_fn, get_variable, \
     try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class NoisyLayer(tf.keras.layers.Layer):
diff --git a/rllib/models/tf/layers/relative_multi_head_attention.py b/rllib/models/tf/layers/relative_multi_head_attention.py
index eb9d2f9c9..affd48cee 100644
--- a/rllib/models/tf/layers/relative_multi_head_attention.py
+++ b/rllib/models/tf/layers/relative_multi_head_attention.py
@@ -1,6 +1,6 @@
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class RelativeMultiHeadAttention(tf.keras.layers.Layer):
diff --git a/rllib/models/tf/layers/skip_connection.py b/rllib/models/tf/layers/skip_connection.py
index f56c7b9ac..f2f0e1d5f 100644
--- a/rllib/models/tf/layers/skip_connection.py
+++ b/rllib/models/tf/layers/skip_connection.py
@@ -1,6 +1,6 @@
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class SkipConnection(tf.keras.layers.Layer):
diff --git a/rllib/models/tf/lstm_v1.py b/rllib/models/tf/lstm_v1.py
index c1889340f..e7e4f4a20 100644
--- a/rllib/models/tf/lstm_v1.py
+++ b/rllib/models/tf/lstm_v1.py
@@ -7,7 +7,7 @@ from ray.rllib.utils.annotations import override
 from ray.rllib.utils.deprecation import deprecation_warning
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 # Deprecated: see as an alternative models/tf/recurrent_net.py
@@ -45,7 +45,7 @@ class LSTM(Model):
         last_layer = add_time_dimension(features, self.seq_lens)
 
         # Setup the LSTM cell
-        lstm = tf.nn.rnn_cell.LSTMCell(cell_size, state_is_tuple=True)
+        lstm = tf1.nn.rnn_cell.LSTMCell(cell_size, state_is_tuple=True)
         self.state_init = [
             np.zeros(lstm.state_size.c, np.float32),
             np.zeros(lstm.state_size.h, np.float32)
@@ -55,15 +55,15 @@ class LSTM(Model):
         if self.state_in:
             c_in, h_in = self.state_in
         else:
-            c_in = tf.placeholder(
+            c_in = tf1.placeholder(
                 tf.float32, [None, lstm.state_size.c], name="c")
-            h_in = tf.placeholder(
+            h_in = tf1.placeholder(
                 tf.float32, [None, lstm.state_size.h], name="h")
             self.state_in = [c_in, h_in]
 
         # Setup LSTM outputs
-        state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
-        lstm_out, lstm_state = tf.nn.dynamic_rnn(
+        state_in = tf1.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
+        lstm_out, lstm_state = tf1.nn.dynamic_rnn(
             lstm,
             last_layer,
             initial_state=state_in,
diff --git a/rllib/models/tf/misc.py b/rllib/models/tf/misc.py
index 64034407a..1da1bbb86 100644
--- a/rllib/models/tf/misc.py
+++ b/rllib/models/tf/misc.py
@@ -1,7 +1,7 @@
 import numpy as np
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 def normc_initializer(std=1.0):
@@ -24,7 +24,7 @@ def conv2d(x,
     if dtype is None:
         dtype = tf.float32
 
-    with tf.variable_scope(name):
+    with tf1.variable_scope(name):
         stride_shape = [1, stride[0], stride[1], 1]
         filter_shape = [
             filter_size[0], filter_size[1],
@@ -40,24 +40,24 @@ def conv2d(x,
         # Initialize weights with random weights.
         w_bound = np.sqrt(6 / (fan_in + fan_out))
 
-        w = tf.get_variable(
+        w = tf1.get_variable(
             "W",
             filter_shape,
             dtype,
-            tf.random_uniform_initializer(-w_bound, w_bound),
+            tf1.random_uniform_initializer(-w_bound, w_bound),
             collections=collections)
-        b = tf.get_variable(
+        b = tf1.get_variable(
             "b", [1, 1, 1, num_filters],
-            initializer=tf.constant_initializer(0.0),
+            initializer=tf1.constant_initializer(0.0),
             collections=collections)
-        return tf.nn.conv2d(x, w, stride_shape, pad) + b
+        return tf1.nn.conv2d(x, w, stride_shape, pad) + b
 
 
 def linear(x, size, name, initializer=None, bias_init=0):
-    w = tf.get_variable(
+    w = tf1.get_variable(
         name + "/w", [x.get_shape()[1], size], initializer=initializer)
-    b = tf.get_variable(
-        name + "/b", [size], initializer=tf.constant_initializer(bias_init))
+    b = tf1.get_variable(
+        name + "/b", [size], initializer=tf1.constant_initializer(bias_init))
     return tf.matmul(x, w) + b
 
 
diff --git a/rllib/models/tf/modelv1_compat.py b/rllib/models/tf/modelv1_compat.py
index fb90c2bbf..a44deba19 100644
--- a/rllib/models/tf/modelv1_compat.py
+++ b/rllib/models/tf/modelv1_compat.py
@@ -9,7 +9,7 @@ from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.tf_ops import scope_vars
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 logger = logging.getLogger(__name__)
 
@@ -47,7 +47,7 @@ def make_v1_wrapper(legacy_model_cls):
             # Tracks update ops
             self._update_ops = None
 
-            with tf.variable_scope(self.name) as scope:
+            with tf1.variable_scope(self.name) as scope:
                 self.variable_scope = scope
 
         @override(ModelV2)
@@ -58,20 +58,20 @@ def make_v1_wrapper(legacy_model_cls):
         def __call__(self, input_dict, state, seq_lens):
             if self.cur_instance:
                 # create a weight-sharing model copy
-                with tf.variable_scope(self.cur_instance.scope, reuse=True):
+                with tf1.variable_scope(self.cur_instance.scope, reuse=True):
                     new_instance = self.legacy_model_cls(
                         input_dict, self.obs_space, self.action_space,
                         self.num_outputs, self.model_config, state, seq_lens)
             else:
                 # create a new model instance
-                with tf.variable_scope(self.name):
+                with tf1.variable_scope(self.name):
                     prev_update_ops = set(
-                        tf.get_collection(tf.GraphKeys.UPDATE_OPS))
+                        tf1.get_collection(tf1.GraphKeys.UPDATE_OPS))
                     new_instance = self.legacy_model_cls(
                         input_dict, self.obs_space, self.action_space,
                         self.num_outputs, self.model_config, state, seq_lens)
                     self._update_ops = list(
-                        set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) -
+                        set(tf1.get_collection(tf1.GraphKeys.UPDATE_OPS)) -
                         prev_update_ops)
             if len(new_instance.state_init) != len(self.get_initial_state()):
                 raise ValueError(
@@ -112,8 +112,9 @@ def make_v1_wrapper(legacy_model_cls):
         def value_function(self):
             assert self.cur_instance is not None, "must call forward first"
 
-            with tf.variable_scope(self.variable_scope):
-                with tf.variable_scope("value_function", reuse=tf.AUTO_REUSE):
+            with tf1.variable_scope(self.variable_scope):
+                with tf1.variable_scope(
+                        "value_function", reuse=tf1.AUTO_REUSE):
                     # Simple case: sharing the feature layer
                     if self.model_config["vf_share_layers"]:
                         return tf.reshape(
diff --git a/rllib/models/tf/recurrent_net.py b/rllib/models/tf/recurrent_net.py
index d31389d37..355213800 100644
--- a/rllib/models/tf/recurrent_net.py
+++ b/rllib/models/tf/recurrent_net.py
@@ -7,7 +7,7 @@ from ray.rllib.policy.sample_batch import SampleBatch
 from ray.rllib.utils.annotations import override, DeveloperAPI
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 @DeveloperAPI
@@ -160,18 +160,17 @@ class LSTMWrapper(RecurrentNetwork):
 
         # Concat. prev-action/reward if required.
         if self.model_config["lstm_use_prev_action_reward"]:
-            if self.model_config["lstm_use_prev_action_reward"]:
-                wrapped_out = tf.concat(
-                    [
-                        wrapped_out,
-                        tf.reshape(
-                            tf.cast(input_dict[SampleBatch.PREV_ACTIONS],
-                                    tf.float32), [-1, self.action_dim]),
-                        tf.reshape(
-                            tf.cast(input_dict[SampleBatch.PREV_REWARDS],
-                                    tf.float32), [-1, 1]),
-                    ],
-                    axis=1)
+            wrapped_out = tf.concat(
+                [
+                    wrapped_out,
+                    tf.reshape(
+                        tf.cast(input_dict[SampleBatch.PREV_ACTIONS],
+                                tf.float32), [-1, self.action_dim]),
+                    tf.reshape(
+                        tf.cast(input_dict[SampleBatch.PREV_REWARDS],
+                                tf.float32), [-1, 1]),
+                ],
+                axis=1)
 
         # Then through our LSTM.
         input_dict["obs_flat"] = wrapped_out
diff --git a/rllib/models/tf/tf_action_dist.py b/rllib/models/tf/tf_action_dist.py
index 031c5ff5c..a6e14257a 100644
--- a/rllib/models/tf/tf_action_dist.py
+++ b/rllib/models/tf/tf_action_dist.py
@@ -9,7 +9,7 @@ from ray.rllib.utils.annotations import override, DeveloperAPI
 from ray.rllib.utils.framework import try_import_tf, try_import_tfp
 from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 tfp = try_import_tfp()
 tree = try_import_tree()
 
@@ -85,7 +85,7 @@ class Categorical(TFActionDistribution):
 
     @override(TFActionDistribution)
     def _build_sample_op(self):
-        return tf.squeeze(tf.multinomial(self.inputs, 1), axis=1)
+        return tf.squeeze(tf.random.categorical(self.inputs, 1), axis=1)
 
     @staticmethod
     @override(ActionDistribution)
diff --git a/rllib/models/tf/tf_modelv2.py b/rllib/models/tf/tf_modelv2.py
index f8b5859ee..94565286f 100644
--- a/rllib/models/tf/tf_modelv2.py
+++ b/rllib/models/tf/tf_modelv2.py
@@ -2,7 +2,7 @@ from ray.rllib.models.modelv2 import ModelV2
 from ray.rllib.utils.annotations import override, PublicAPI
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 @PublicAPI
@@ -39,10 +39,10 @@ class TFModelV2(ModelV2):
             name,
             framework="tf")
         self.var_list = []
-        if tf.executing_eagerly():
+        if tf1.executing_eagerly():
             self.graph = None
         else:
-            self.graph = tf.get_default_graph()
+            self.graph = tf1.get_default_graph()
 
     def context(self):
         """Returns a contextmanager for the current TF graph."""
diff --git a/rllib/models/tf/visionnet.py b/rllib/models/tf/visionnet.py
index e4fd75a40..97f8bcf5d 100644
--- a/rllib/models/tf/visionnet.py
+++ b/rllib/models/tf/visionnet.py
@@ -3,7 +3,7 @@ from ray.rllib.models.tf.visionnet_v1 import _get_filter_config
 from ray.rllib.models.tf.misc import normc_initializer
 from ray.rllib.utils.framework import get_activation_fn, try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class VisionNetwork(TFModelV2):
diff --git a/rllib/models/tf/visionnet_v1.py b/rllib/models/tf/visionnet_v1.py
index 539e84e9c..417149402 100644
--- a/rllib/models/tf/visionnet_v1.py
+++ b/rllib/models/tf/visionnet_v1.py
@@ -4,7 +4,7 @@ from ray.rllib.utils.annotations import override
 from ray.rllib.utils.deprecation import deprecation_warning
 from ray.rllib.utils.framework import get_activation_fn, try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 # Deprecated: see as an alternative models/tf.visionnet.py
@@ -24,9 +24,9 @@ class VisionNetwork(Model):
 
         activation = get_activation_fn(options.get("conv_activation"))
 
-        with tf.name_scope("vision_net"):
+        with tf1.name_scope("vision_net"):
             for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
-                inputs = tf.layers.conv2d(
+                inputs = tf1.layers.conv2d(
                     inputs,
                     out_size,
                     kernel,
@@ -38,7 +38,7 @@ class VisionNetwork(Model):
 
             # skip final linear layer
             if options.get("no_final_linear"):
-                fc_out = tf.layers.conv2d(
+                fc_out = tf1.layers.conv2d(
                     inputs,
                     num_outputs,
                     kernel,
@@ -48,7 +48,7 @@ class VisionNetwork(Model):
                     name="fc_out")
                 return flatten(fc_out), flatten(fc_out)
 
-            fc1 = tf.layers.conv2d(
+            fc1 = tf1.layers.conv2d(
                 inputs,
                 out_size,
                 kernel,
@@ -56,7 +56,7 @@ class VisionNetwork(Model):
                 activation=activation,
                 padding="valid",
                 name="fc1")
-            fc2 = tf.layers.conv2d(
+            fc2 = tf1.layers.conv2d(
                 fc1,
                 num_outputs, [1, 1],
                 activation=None,
diff --git a/rllib/offline/input_reader.py b/rllib/offline/input_reader.py
index 9fe5f4309..c0eeb11da 100644
--- a/rllib/offline/input_reader.py
+++ b/rllib/offline/input_reader.py
@@ -6,7 +6,7 @@ from ray.rllib.policy.sample_batch import MultiAgentBatch
 from ray.rllib.utils.annotations import PublicAPI
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 logger = logging.getLogger(__name__)
 
@@ -75,7 +75,7 @@ class InputReader:
             k: (-1, ) + s[1:]
             for (k, s) in [(k, batch[k].shape) for k in keys]
         }
-        queue = tf.FIFOQueue(capacity=queue_size, dtypes=dtypes, names=keys)
+        queue = tf1.FIFOQueue(capacity=queue_size, dtypes=dtypes, names=keys)
         tensors = queue.dequeue()
 
         logger.info("Creating TF queue runner for {}".format(self))
@@ -92,12 +92,12 @@ class _QueueRunner(threading.Thread):
 
     def __init__(self, input_reader, queue, keys, dtypes):
         threading.Thread.__init__(self)
-        self.sess = tf.get_default_session()
+        self.sess = tf1.get_default_session()
         self.daemon = True
         self.input_reader = input_reader
         self.keys = keys
         self.queue = queue
-        self.placeholders = [tf.placeholder(dtype) for dtype in dtypes]
+        self.placeholders = [tf1.placeholder(dtype) for dtype in dtypes]
         self.enqueue_op = queue.enqueue(dict(zip(keys, self.placeholders)))
 
     def enqueue(self, batch):
diff --git a/rllib/offline/json_reader.py b/rllib/offline/json_reader.py
index b72d139cc..372349cfd 100644
--- a/rllib/offline/json_reader.py
+++ b/rllib/offline/json_reader.py
@@ -45,7 +45,7 @@ class JsonReader(InputReader):
                 logger.warning(
                     "Treating input directory as glob pattern: {}".format(
                         inputs))
-            if urlparse(inputs).scheme not in ["d", ""]:
+            if urlparse(inputs).scheme not in ["", "c"]:
                 raise ValueError(
                     "Don't know how to glob over `{}`, ".format(inputs) +
                     "please specify a list of files to read instead.")
@@ -123,7 +123,7 @@ class JsonReader(InputReader):
 
     def _next_file(self):
         path = random.choice(self.files)
-        if urlparse(path).scheme:
+        if urlparse(path).scheme not in ["", "c"]:
             if smart_open is None:
                 raise ValueError(
                     "You must install the `smart_open` module to read "
diff --git a/rllib/offline/json_writer.py b/rllib/offline/json_writer.py
index 47df20329..f9700eb44 100644
--- a/rllib/offline/json_writer.py
+++ b/rllib/offline/json_writer.py
@@ -42,7 +42,7 @@ class JsonWriter(OutputWriter):
         self.ioctx = ioctx or IOContext()
         self.max_file_size = max_file_size
         self.compress_columns = compress_columns
-        if urlparse(path).scheme:
+        if urlparse(path).scheme not in ["", "c"]:
             self.path_is_uri = True
         else:
             path = os.path.abspath(os.path.expanduser(path))
diff --git a/rllib/optimizers/aso_multi_gpu_learner.py b/rllib/optimizers/aso_multi_gpu_learner.py
index 1935e78c5..346babf63 100644
--- a/rllib/optimizers/aso_multi_gpu_learner.py
+++ b/rllib/optimizers/aso_multi_gpu_learner.py
@@ -15,7 +15,7 @@ from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.timer import TimerStat
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 logger = logging.getLogger(__name__)
 
@@ -86,14 +86,15 @@ class TFMultiGPULearner(LearnerThread):
         self.par_opt = []
         with self.local_worker.tf_sess.graph.as_default():
             with self.local_worker.tf_sess.as_default():
-                with tf.variable_scope(DEFAULT_POLICY_ID, reuse=tf.AUTO_REUSE):
+                with tf1.variable_scope(
+                        DEFAULT_POLICY_ID, reuse=tf1.AUTO_REUSE):
                     if self.policy._state_inputs:
                         rnn_inputs = self.policy._state_inputs + [
                             self.policy._seq_lens
                         ]
                     else:
                         rnn_inputs = []
-                    adam = tf.train.AdamOptimizer(self.lr)
+                    adam = tf1.train.AdamOptimizer(self.lr)
                     for _ in range(num_data_loader_buffers):
                         self.par_opt.append(
                             LocalSyncParallelOptimizer(
@@ -105,7 +106,7 @@ class TFMultiGPULearner(LearnerThread):
                                 self.policy.copy))
 
                 self.sess = self.local_worker.tf_sess
-                self.sess.run(tf.global_variables_initializer())
+                self.sess.run(tf1.global_variables_initializer())
 
         self.idle_optimizers = queue.Queue()
         self.ready_optimizers = queue.Queue()
diff --git a/rllib/optimizers/multi_gpu_impl.py b/rllib/optimizers/multi_gpu_impl.py
index 0771bb18b..ac6d475e9 100644
--- a/rllib/optimizers/multi_gpu_impl.py
+++ b/rllib/optimizers/multi_gpu_impl.py
@@ -5,7 +5,7 @@ from ray.util.debug import log_once
 from ray.rllib.utils.debug import summarize
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 # Variable scope in which created variables will be placed under
 TOWER_SCOPE_NAME = "tower"
@@ -63,21 +63,21 @@ class LocalSyncParallelOptimizer:
         self.build_graph = build_graph
 
         # First initialize the shared loss network
-        with tf.name_scope(TOWER_SCOPE_NAME):
+        with tf1.name_scope(TOWER_SCOPE_NAME):
             self._shared_loss = build_graph(self.loss_inputs)
-        shared_ops = tf.get_collection(
-            tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name)
+        shared_ops = tf1.get_collection(
+            tf1.GraphKeys.UPDATE_OPS, scope=tf1.get_variable_scope().name)
 
         # Then setup the per-device loss graphs that use the shared weights
-        self._batch_index = tf.placeholder(tf.int32, name="batch_index")
+        self._batch_index = tf1.placeholder(tf.int32, name="batch_index")
 
         # Dynamic batch size, which may be shrunk if there isn't enough data
-        self._per_device_batch_size = tf.placeholder(
+        self._per_device_batch_size = tf1.placeholder(
             tf.int32, name="per_device_batch_size")
         self._loaded_per_device_batch_size = max_per_device_batch_size
 
         # When loading RNN input, we dynamically determine the max seq len
-        self._max_seq_len = tf.placeholder(tf.int32, name="max_seq_len")
+        self._max_seq_len = tf1.placeholder(tf.int32, name="max_seq_len")
         self._loaded_max_seq_len = 1
 
         # Split on the CPU in case the data doesn't fit in GPU memory.
@@ -103,15 +103,15 @@ class LocalSyncParallelOptimizer:
         # gather update ops for any batch norm layers. TODO(ekl) here we will
         # use all the ops found which won't work for DQN / DDPG, but those
         # aren't supported with multi-gpu right now anyways.
-        self._update_ops = tf.get_collection(
-            tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name)
+        self._update_ops = tf1.get_collection(
+            tf1.GraphKeys.UPDATE_OPS, scope=tf1.get_variable_scope().name)
         for op in shared_ops:
             self._update_ops.remove(op)  # only care about tower update ops
         if self._update_ops:
             logger.debug("Update ops to run on apply gradient: {}".format(
                 self._update_ops))
 
-        with tf.control_dependencies(self._update_ops):
+        with tf1.control_dependencies(self._update_ops):
             self._train_op = self.optimizer.apply_gradients(avg)
 
     def load_data(self, sess, inputs, state_inputs):
@@ -265,11 +265,11 @@ class LocalSyncParallelOptimizer:
     def _setup_device(self, device, device_input_placeholders, num_data_in):
         assert num_data_in <= len(device_input_placeholders)
         with tf.device(device):
-            with tf.name_scope(TOWER_SCOPE_NAME):
+            with tf1.name_scope(TOWER_SCOPE_NAME):
                 device_input_batches = []
                 device_input_slices = []
                 for i, ph in enumerate(device_input_placeholders):
-                    current_batch = tf.Variable(
+                    current_batch = tf1.Variable(
                         ph,
                         trainable=False,
                         validate_shape=False,
diff --git a/rllib/optimizers/multi_gpu_optimizer.py b/rllib/optimizers/multi_gpu_optimizer.py
index 20883ff83..8ff0c2370 100644
--- a/rllib/optimizers/multi_gpu_optimizer.py
+++ b/rllib/optimizers/multi_gpu_optimizer.py
@@ -16,7 +16,7 @@ from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.sgd import averaged
 from ray.rllib.utils.timer import TimerStat
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 logger = logging.getLogger(__name__)
 
@@ -115,7 +115,7 @@ class LocalMultiGPUOptimizer(PolicyOptimizer):
         with self.workers.local_worker().tf_sess.graph.as_default():
             with self.workers.local_worker().tf_sess.as_default():
                 for policy_id, policy in self.policies.items():
-                    with tf.variable_scope(policy_id, reuse=tf.AUTO_REUSE):
+                    with tf1.variable_scope(policy_id, reuse=tf1.AUTO_REUSE):
                         if policy._state_inputs:
                             rnn_inputs = policy._state_inputs + [
                                 policy._seq_lens
@@ -130,7 +130,7 @@ class LocalMultiGPUOptimizer(PolicyOptimizer):
                                 self.per_device_batch_size, policy.copy))
 
                 self.sess = self.workers.local_worker().tf_sess
-                self.sess.run(tf.global_variables_initializer())
+                self.sess.run(tf1.global_variables_initializer())
 
     @override(PolicyOptimizer)
     def step(self):
diff --git a/rllib/optimizers/tests/test_optimizers.py b/rllib/optimizers/tests/test_optimizers.py
index 35ff838de..0c145757c 100644
--- a/rllib/optimizers/tests/test_optimizers.py
+++ b/rllib/optimizers/tests/test_optimizers.py
@@ -14,7 +14,7 @@ from ray.rllib.policy.sample_batch import SampleBatch
 from ray.rllib.tests.mock_worker import _MockWorker
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class LRScheduleTest(unittest.TestCase):
@@ -250,7 +250,7 @@ class AsyncSamplesOptimizerTest(unittest.TestCase):
 
     def _make_envs(self):
         def make_sess():
-            return tf.Session(config=tf.ConfigProto(device_count={"CPU": 2}))
+            return tf1.Session(config=tf1.ConfigProto(device_count={"CPU": 2}))
 
         local = RolloutWorker(
             env_creator=lambda _: gym.make("CartPole-v0"),
diff --git a/rllib/policy/dynamic_tf_policy.py b/rllib/policy/dynamic_tf_policy.py
index d8502e91b..68fde7339 100644
--- a/rllib/policy/dynamic_tf_policy.py
+++ b/rllib/policy/dynamic_tf_policy.py
@@ -14,7 +14,7 @@ from ray.rllib.utils.debug import summarize
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.tracking_dict import UsageTrackingDict
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 logger = logging.getLogger(__name__)
 
@@ -116,7 +116,7 @@ class DynamicTFPolicy(TFPolicy):
             explore = existing_inputs["is_exploring"]
             timestep = existing_inputs["timestep"]
         else:
-            obs = tf.placeholder(
+            obs = tf1.placeholder(
                 tf.float32,
                 shape=[None] + list(obs_space.shape),
                 name="observation")
@@ -124,11 +124,11 @@ class DynamicTFPolicy(TFPolicy):
             if self._obs_include_prev_action_reward:
                 prev_actions = ModelCatalog.get_action_placeholder(
                     action_space, "prev_action")
-                prev_rewards = tf.placeholder(
+                prev_rewards = tf1.placeholder(
                     tf.float32, [None], name="prev_reward")
-            explore = tf.placeholder_with_default(
+            explore = tf1.placeholder_with_default(
                 True, (), name="is_exploring")
-            timestep = tf.placeholder(tf.int32, (), name="timestep")
+            timestep = tf1.placeholder(tf.int32, (), name="timestep")
 
         self._input_dict = {
             SampleBatch.CUR_OBS: obs,
@@ -137,7 +137,7 @@ class DynamicTFPolicy(TFPolicy):
             "is_training": self._get_is_training_placeholder(),
         }
         # Placeholder for RNN time-chunk valid lengths.
-        self._seq_lens = tf.placeholder(
+        self._seq_lens = tf1.placeholder(
             dtype=tf.int32, shape=[None], name="seq_lens")
 
         dist_class = dist_inputs = None
@@ -176,7 +176,7 @@ class DynamicTFPolicy(TFPolicy):
                 self._seq_lens = existing_inputs["seq_lens"]
         else:
             self._state_in = [
-                tf.placeholder(shape=(None, ) + s.shape, dtype=s.dtype)
+                tf1.placeholder(shape=(None, ) + s.shape, dtype=s.dtype)
                 for s in self.model.get_initial_state()
             ]
 
@@ -223,7 +223,7 @@ class DynamicTFPolicy(TFPolicy):
                     explore=explore)
 
         # Phase 1 init.
-        sess = tf.get_default_session() or tf.Session()
+        sess = tf1.get_default_session() or tf1.Session()
         if get_batch_divisibility_req:
             batch_divisibility_req = get_batch_divisibility_req(self)
         else:
@@ -343,7 +343,7 @@ class DynamicTFPolicy(TFPolicy):
             dummy_batch[k] = fake_array(v)
 
         # postprocessing might depend on variable init, so run it first here
-        self._sess.run(tf.global_variables_initializer())
+        self._sess.run(tf1.global_variables_initializer())
 
         postprocessed_batch = self.postprocess_trajectory(
             SampleBatch(dummy_batch))
@@ -380,7 +380,7 @@ class DynamicTFPolicy(TFPolicy):
                 continue
             shape = (None, ) + v.shape[1:]
             dtype = np.float32 if v.dtype == np.float64 else v.dtype
-            placeholder = tf.placeholder(dtype, shape=shape, name=k)
+            placeholder = tf1.placeholder(dtype, shape=shape, name=k)
             train_batch[k] = placeholder
 
         for i, si in enumerate(self._state_in):
@@ -402,7 +402,7 @@ class DynamicTFPolicy(TFPolicy):
         if self._grad_stats_fn:
             self._stats_fetches.update(
                 self._grad_stats_fn(self, train_batch, self._grads))
-        self._sess.run(tf.global_variables_initializer())
+        self._sess.run(tf1.global_variables_initializer())
 
     def _do_loss_init(self, train_batch):
         loss = self._loss_fn(self, self.model, self.dist_class, train_batch)
diff --git a/rllib/policy/eager_tf_policy.py b/rllib/policy/eager_tf_policy.py
index 767f84750..9d0f3377b 100644
--- a/rllib/policy/eager_tf_policy.py
+++ b/rllib/policy/eager_tf_policy.py
@@ -16,7 +16,7 @@ from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.spaces.space_utils import flatten_to_single_ndarray
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 logger = logging.getLogger(__name__)
 
 
@@ -239,7 +239,7 @@ def build_eager_tf_policy(name,
                 )
             self.exploration = self._create_exploration()
             self._state_in = [
-                tf.convert_to_tensor(np.array([s]))
+                tf.convert_to_tensor([s])
                 for s in self.model.get_initial_state()
             ]
             input_dict = {
@@ -266,7 +266,7 @@ def build_eager_tf_policy(name,
             if optimizer_fn:
                 self._optimizer = optimizer_fn(self, config)
             else:
-                self._optimizer = tf.train.AdamOptimizer(config["lr"])
+                self._optimizer = tf1.train.AdamOptimizer(config["lr"])
 
             if after_init:
                 after_init(self, observation_space, action_space, config)
@@ -618,8 +618,7 @@ def build_eager_tf_policy(name,
                 SampleBatch.DONES: np.array([False], dtype=np.bool),
                 SampleBatch.REWARDS: np.array([0], dtype=np.float32),
             }
-            if isinstance(self.action_space, Tuple) or isinstance(
-                    self.action_space, Dict):
+            if isinstance(self.action_space, (Dict, Tuple)):
                 dummy_batch[SampleBatch.ACTIONS] = [
                     flatten_to_single_ndarray(self.action_space.sample())
                 ]
@@ -640,7 +639,7 @@ def build_eager_tf_policy(name,
                 dummy_batch["seq_lens"] = np.array([1], dtype=np.int32)
 
             # Convert everything to tensors.
-            dummy_batch = tf.nest.map_structure(tf.convert_to_tensor,
+            dummy_batch = tf.nest.map_structure(tf1.convert_to_tensor,
                                                 dummy_batch)
 
             # for IMPALA which expects a certain sample batch size.
diff --git a/rllib/policy/rnn_sequencing.py b/rllib/policy/rnn_sequencing.py
index d38ec9158..5946938dc 100644
--- a/rllib/policy/rnn_sequencing.py
+++ b/rllib/policy/rnn_sequencing.py
@@ -20,7 +20,7 @@ from ray.rllib.utils.annotations import DeveloperAPI
 from ray.rllib.utils.debug import summarize
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 logger = logging.getLogger(__name__)
@@ -203,7 +203,7 @@ def chop_into_sequences(episode_ids,
     seq_len = 0
     unique_ids = np.add(
         np.add(episode_ids, agent_indices),
-        np.array(unroll_ids) << 32)
+        np.array(unroll_ids, dtype=np.int64) << 32)
     for uid in unique_ids:
         if (prev_id is not None and uid != prev_id) or \
                 seq_len >= max_seq_len:
diff --git a/rllib/policy/tests/test_compute_log_likelihoods.py b/rllib/policy/tests/test_compute_log_likelihoods.py
index 16554205c..10fa7d705 100644
--- a/rllib/policy/tests/test_compute_log_likelihoods.py
+++ b/rllib/policy/tests/test_compute_log_likelihoods.py
@@ -11,7 +11,7 @@ from ray.rllib.utils.test_utils import check, framework_iterator
 from ray.rllib.utils.numpy import one_hot, fc, MIN_LOG_NN_OUTPUT, \
     MAX_LOG_NN_OUTPUT
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 def do_test_log_likelihood(run,
diff --git a/rllib/policy/tf_policy.py b/rllib/policy/tf_policy.py
index d3b00a2d8..6b4243190 100644
--- a/rllib/policy/tf_policy.py
+++ b/rllib/policy/tf_policy.py
@@ -16,7 +16,7 @@ from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.schedules import ConstantSchedule, PiecewiseSchedule
 from ray.rllib.utils.tf_run_builder import TFRunBuilder
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 logger = logging.getLogger(__name__)
 
 
@@ -131,9 +131,9 @@ class TFPolicy(Policy):
         self._sampled_action = sampled_action
         self._is_training = self._get_is_training_placeholder()
         self._is_exploring = explore if explore is not None else \
-            tf.placeholder_with_default(True, (), name="is_exploring")
+            tf1.placeholder_with_default(True, (), name="is_exploring")
         self._sampled_action_logp = sampled_action_logp
-        self._sampled_action_prob = (tf.exp(self._sampled_action_logp)
+        self._sampled_action_prob = (tf.math.exp(self._sampled_action_logp)
                                      if self._sampled_action_logp is not None
                                      else None)
         self._action_input = action_input  # For logp calculations.
@@ -162,7 +162,7 @@ class TFPolicy(Policy):
         self._apply_op = None
         self._stats_fetches = {}
         self._timestep = timestep if timestep is not None else \
-            tf.placeholder(tf.int32, (), name="timestep")
+            tf1.placeholder(tf.int32, (), name="timestep")
 
         self._optimizer = None
         self._grads_and_vars = None
@@ -248,12 +248,12 @@ class TFPolicy(Policy):
 
         # gather update ops for any batch norm layers
         if not self._update_ops:
-            self._update_ops = tf.get_collection(
-                tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name)
+            self._update_ops = tf1.get_collection(
+                tf1.GraphKeys.UPDATE_OPS, scope=tf1.get_variable_scope().name)
         if self._update_ops:
             logger.info("Update ops to run on apply gradient: {}".format(
                 self._update_ops))
-        with tf.control_dependencies(self._update_ops):
+        with tf1.control_dependencies(self._update_ops):
             self._apply_op = self.build_apply_op(self._optimizer,
                                                  self._grads_and_vars)
 
@@ -262,7 +262,7 @@ class TFPolicy(Policy):
                 "These tensors were used in the loss_fn:\n\n{}\n".format(
                     summarize(self._loss_input_dict)))
 
-        self._sess.run(tf.global_variables_initializer())
+        self._sess.run(tf1.global_variables_initializer())
         self._optimizer_variables = None
         if self._optimizer:
             self._optimizer_variables = \
@@ -397,12 +397,12 @@ class TFPolicy(Policy):
     def export_model(self, export_dir):
         """Export tensorflow graph to export_dir for serving."""
         with self._sess.graph.as_default():
-            builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
+            builder = tf1.saved_model.builder.SavedModelBuilder(export_dir)
             signature_def_map = self._build_signature_def()
             builder.add_meta_graph_and_variables(
-                self._sess, [tf.saved_model.tag_constants.SERVING],
+                self._sess, [tf1.saved_model.tag_constants.SERVING],
                 signature_def_map=signature_def_map,
-                saver=tf.summary.FileWriter(export_dir).add_graph(
+                saver=tf1.summary.FileWriter(export_dir).add_graph(
                     graph=self._sess.graph))
             builder.save()
 
@@ -417,7 +417,7 @@ class TFPolicy(Policy):
                 raise
         save_path = os.path.join(export_dir, filename_prefix)
         with self._sess.graph.as_default():
-            saver = tf.train.Saver()
+            saver = tf1.train.Saver()
             saver.save(self._sess, save_path)
 
     @override(Policy)
@@ -479,9 +479,9 @@ class TFPolicy(Policy):
     def optimizer(self):
         """TF optimizer to use for policy optimization."""
         if hasattr(self, "config"):
-            return tf.train.AdamOptimizer(learning_rate=self.config["lr"])
+            return tf1.train.AdamOptimizer(learning_rate=self.config["lr"])
         else:
-            return tf.train.AdamOptimizer()
+            return tf1.train.AdamOptimizer()
 
     @DeveloperAPI
     def gradients(self, optimizer, loss):
@@ -495,7 +495,7 @@ class TFPolicy(Policy):
         # specify global_step for TD3 which needs to count the num updates
         return optimizer.apply_gradients(
             self._grads_and_vars,
-            global_step=tf.train.get_or_create_global_step())
+            global_step=tf1.train.get_or_create_global_step())
 
     @DeveloperAPI
     def _get_is_training_placeholder(self):
@@ -504,7 +504,7 @@ class TFPolicy(Policy):
         This can be called safely before __init__ has run.
         """
         if not hasattr(self, "_is_training"):
-            self._is_training = tf.placeholder_with_default(
+            self._is_training = tf1.placeholder_with_default(
                 False, (), name="is_training")
         return self._is_training
 
@@ -519,7 +519,7 @@ class TFPolicy(Policy):
         """
         feed_dict = self.extra_compute_action_feed_dict()
         return {
-            k.name: tf.saved_model.utils.build_tensor_info(k)
+            k.name: tf1.saved_model.utils.build_tensor_info(k)
             for k in feed_dict.keys()
         }
 
@@ -529,7 +529,7 @@ class TFPolicy(Policy):
         """
         fetches = self.extra_compute_action_fetches()
         return {
-            k: tf.saved_model.utils.build_tensor_info(fetches[k])
+            k: tf1.saved_model.utils.build_tensor_info(fetches[k])
             for k in fetches.keys()
         }
 
@@ -539,38 +539,40 @@ class TFPolicy(Policy):
         # build input signatures
         input_signature = self._extra_input_signature_def()
         input_signature["observations"] = \
-            tf.saved_model.utils.build_tensor_info(self._obs_input)
+            tf1.saved_model.utils.build_tensor_info(self._obs_input)
 
         if self._seq_lens is not None:
             input_signature["seq_lens"] = \
-                tf.saved_model.utils.build_tensor_info(self._seq_lens)
+                tf1.saved_model.utils.build_tensor_info(self._seq_lens)
         if self._prev_action_input is not None:
             input_signature["prev_action"] = \
-                tf.saved_model.utils.build_tensor_info(self._prev_action_input)
+                tf1.saved_model.utils.build_tensor_info(
+                    self._prev_action_input)
         if self._prev_reward_input is not None:
             input_signature["prev_reward"] = \
-                tf.saved_model.utils.build_tensor_info(self._prev_reward_input)
+                tf1.saved_model.utils.build_tensor_info(
+                    self._prev_reward_input)
         input_signature["is_training"] = \
-            tf.saved_model.utils.build_tensor_info(self._is_training)
+            tf1.saved_model.utils.build_tensor_info(self._is_training)
 
         for state_input in self._state_inputs:
             input_signature[state_input.name] = \
-                tf.saved_model.utils.build_tensor_info(state_input)
+                tf1.saved_model.utils.build_tensor_info(state_input)
 
         # build output signatures
         output_signature = self._extra_output_signature_def()
         for i, a in enumerate(tf.nest.flatten(self._sampled_action)):
             output_signature["actions_{}".format(i)] = \
-                tf.saved_model.utils.build_tensor_info(a)
+                tf1.saved_model.utils.build_tensor_info(a)
 
         for state_output in self._state_outputs:
             output_signature[state_output.name] = \
-                tf.saved_model.utils.build_tensor_info(state_output)
+                tf1.saved_model.utils.build_tensor_info(state_output)
         signature_def = (
-            tf.saved_model.signature_def_utils.build_signature_def(
+            tf1.saved_model.signature_def_utils.build_signature_def(
                 input_signature, output_signature,
-                tf.saved_model.signature_constants.PREDICT_METHOD_NAME))
-        signature_def_key = (tf.saved_model.signature_constants.
+                tf1.saved_model.signature_constants.PREDICT_METHOD_NAME))
+        signature_def_key = (tf1.saved_model.signature_constants.
                              DEFAULT_SERVING_SIGNATURE_DEF_KEY)
         signature_def_map = {signature_def_key: signature_def}
         return signature_def_map
@@ -708,7 +710,7 @@ class LearningRateSchedule:
 
     @DeveloperAPI
     def __init__(self, lr, lr_schedule):
-        self.cur_lr = tf.get_variable("lr", initializer=lr, trainable=False)
+        self.cur_lr = tf1.get_variable("lr", initializer=lr, trainable=False)
         if lr_schedule is None:
             self.lr_schedule = ConstantSchedule(lr, framework=None)
         else:
@@ -724,7 +726,7 @@ class LearningRateSchedule:
 
     @override(TFPolicy)
     def optimizer(self):
-        return tf.train.AdamOptimizer(learning_rate=self.cur_lr)
+        return tf1.train.AdamOptimizer(learning_rate=self.cur_lr)
 
 
 @DeveloperAPI
@@ -733,7 +735,7 @@ class EntropyCoeffSchedule:
 
     @DeveloperAPI
     def __init__(self, entropy_coeff, entropy_coeff_schedule):
-        self.entropy_coeff = tf.get_variable(
+        self.entropy_coeff = tf1.get_variable(
             "entropy_coeff", initializer=entropy_coeff, trainable=False)
 
         if entropy_coeff_schedule is None:
diff --git a/rllib/policy/tf_policy_template.py b/rllib/policy/tf_policy_template.py
index 5c1f51f03..c355e6f4d 100644
--- a/rllib/policy/tf_policy_template.py
+++ b/rllib/policy/tf_policy_template.py
@@ -4,9 +4,6 @@ from ray.rllib.policy.policy import Policy, LEARNER_STATS_KEY
 from ray.rllib.policy.tf_policy import TFPolicy
 from ray.rllib.utils import add_mixins
 from ray.rllib.utils.annotations import override, DeveloperAPI
-from ray.rllib.utils.framework import try_import_tf
-
-tf = try_import_tf()
 
 
 @DeveloperAPI
diff --git a/rllib/tests/test_catalog.py b/rllib/tests/test_catalog.py
index 9c2c1f3da..32bfec194 100644
--- a/rllib/tests/test_catalog.py
+++ b/rllib/tests/test_catalog.py
@@ -14,7 +14,7 @@ from ray.rllib.models.tf.visionnet import VisionNetwork
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class CustomPreprocessor(Preprocessor):
@@ -54,7 +54,7 @@ class CustomActionDistribution(TFActionDistribution):
 
     @override(TFActionDistribution)
     def _build_sample_op(self):
-        return tf.random_uniform(self.output_shape)
+        return tf.random.uniform(self.output_shape)
 
     @override(ActionDistribution)
     def logp(self, x):
@@ -102,15 +102,15 @@ class ModelCatalogTest(unittest.TestCase):
     def test_default_models(self):
         ray.init(object_store_memory=1000 * 1024 * 1024)
 
-        with tf.variable_scope("test1"):
+        with tf1.variable_scope("test1"):
             p1 = ModelCatalog.get_model_v2(
-                obs_space=Box(0, 1, shape=(3, ), dtype=np.float32),
+                obs_space=Box(0, 1, shape=(3,), dtype=np.float32),
                 action_space=Discrete(5),
                 num_outputs=5,
                 model_config={})
             self.assertEqual(type(p1), FullyConnectedNetwork)
 
-        with tf.variable_scope("test2"):
+        with tf1.variable_scope("test2"):
             p2 = ModelCatalog.get_model_v2(
                 obs_space=Box(0, 1, shape=(84, 84, 3), dtype=np.float32),
                 action_space=Discrete(5),
@@ -149,7 +149,7 @@ class ModelCatalogTest(unittest.TestCase):
         self.assertEqual(param_shape, action_space.shape)
 
         # test the class works as a distribution
-        dist_input = tf.placeholder(tf.float32, (None, ) + param_shape)
+        dist_input = tf1.placeholder(tf.float32, (None,) + param_shape)
         model = Model()
         model.model_config = model_config
         dist = dist_cls(dist_input, model=model)
@@ -163,7 +163,7 @@ class ModelCatalogTest(unittest.TestCase):
         dist_cls, param_shape = ModelCatalog.get_action_dist(
             action_space, model_config)
         self.assertEqual(param_shape, (3, ))
-        dist_input = tf.placeholder(tf.float32, (None, ) + param_shape)
+        dist_input = tf1.placeholder(tf.float32, (None,) + param_shape)
         model.model_config = model_config
         dist = dist_cls(dist_input, model=model)
         self.assertEqual(dist.sample().shape[1:], dist_input.shape[1:])
diff --git a/rllib/tests/test_model_imports.py b/rllib/tests/test_model_imports.py
index cc800c9f3..cf9aa8519 100644
--- a/rllib/tests/test_model_imports.py
+++ b/rllib/tests/test_model_imports.py
@@ -14,7 +14,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 from ray.rllib.utils.test_utils import check, framework_iterator
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, nn = try_import_torch()
 
 
diff --git a/rllib/tests/test_nested_observation_spaces.py b/rllib/tests/test_nested_observation_spaces.py
index dabc85be1..c9e1607d4 100644
--- a/rllib/tests/test_nested_observation_spaces.py
+++ b/rllib/tests/test_nested_observation_spaces.py
@@ -22,7 +22,7 @@ from ray.tune.registry import register_env
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 from ray.rllib.utils.spaces.repeated import Repeated
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 _, nn = try_import_torch()
 
 DICT_SPACE = spaces.Dict({
@@ -241,9 +241,9 @@ class DictSpyModel(TFModelV2):
                 pickle.dumps((pos, front_cam, task)),
                 overwrite=True)
             DictSpyModel.capture_index += 1
-            return 0
+            return np.array(0, dtype=np.int64)
 
-        spy_fn = tf.py_func(
+        spy_fn = tf1.py_func(
             spy, [
                 input_dict["obs"]["sensors"]["position"],
                 input_dict["obs"]["sensors"]["front_cam"][0],
@@ -252,9 +252,9 @@ class DictSpyModel(TFModelV2):
             tf.int64,
             stateful=True)
 
-        with tf.control_dependencies([spy_fn]):
-            output = tf.layers.dense(input_dict["obs"]["sensors"]["position"],
-                                     self.num_outputs)
+        with tf1.control_dependencies([spy_fn]):
+            output = tf1.layers.dense(input_dict["obs"]["sensors"]["position"],
+                                      self.num_outputs)
         return output, []
 
 
@@ -270,9 +270,9 @@ class TupleSpyModel(TFModelV2):
                 pickle.dumps((pos, cam, task)),
                 overwrite=True)
             TupleSpyModel.capture_index += 1
-            return 0
+            return np.array(0, dtype=np.int64)
 
-        spy_fn = tf.py_func(
+        spy_fn = tf1.py_func(
             spy, [
                 input_dict["obs"][0],
                 input_dict["obs"][1][0],
@@ -281,8 +281,8 @@ class TupleSpyModel(TFModelV2):
             tf.int64,
             stateful=True)
 
-        with tf.control_dependencies([spy_fn]):
-            output = tf.layers.dense(input_dict["obs"][0], self.num_outputs)
+        with tf1.control_dependencies([spy_fn]):
+            output = tf1.layers.dense(input_dict["obs"][0], self.num_outputs)
         return output, []
 
 
diff --git a/rllib/train.py b/rllib/train.py
index c7885f5a0..691ed8fd2 100755
--- a/rllib/train.py
+++ b/rllib/train.py
@@ -14,7 +14,7 @@ from ray.tune.tune import _make_scheduler, run_experiments
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 
 # Try to import both backends for flag checking/warnings.
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 EXAMPLE_USAGE = """
diff --git a/rllib/utils/exploration/epsilon_greedy.py b/rllib/utils/exploration/epsilon_greedy.py
index 26abd766a..75b17215e 100644
--- a/rllib/utils/exploration/epsilon_greedy.py
+++ b/rllib/utils/exploration/epsilon_greedy.py
@@ -9,7 +9,7 @@ from ray.rllib.utils.from_config import from_config
 from ray.rllib.utils.numpy import LARGE_INTEGER
 from ray.rllib.utils.schedules import Schedule, PiecewiseSchedule
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 
@@ -97,12 +97,11 @@ class EpsilonGreedy(Exploration):
             tf.equal(q_values, tf.float32.min),
             tf.ones_like(q_values) * tf.float32.min, tf.ones_like(q_values))
         random_actions = tf.squeeze(
-            tf.multinomial(random_valid_action_logits, 1), axis=1)
+            tf.random.categorical(random_valid_action_logits, 1), axis=1)
 
-        chose_random = tf.random_uniform(
+        chose_random = tf.random.uniform(
             tf.stack([batch_size]),
-            minval=0, maxval=1, dtype=tf.float32) \
-            < epsilon
+            minval=0, maxval=1, dtype=tf.float32) < epsilon
 
         action = tf.cond(
             pred=tf.constant(explore, dtype=tf.bool)
@@ -112,8 +111,8 @@ class EpsilonGreedy(Exploration):
             ),
             false_fn=lambda: exploit_action)
 
-        assign_op = tf.assign(self.last_timestep, timestep)
-        with tf.control_dependencies([assign_op]):
+        assign_op = tf1.assign(self.last_timestep, timestep)
+        with tf1.control_dependencies([assign_op]):
             return action, tf.zeros_like(action, dtype=tf.float32)
 
     def _get_torch_exploration_action(self, q_values, explore, timestep):
diff --git a/rllib/utils/exploration/gaussian_noise.py b/rllib/utils/exploration/gaussian_noise.py
index 16554c927..34ebba45d 100644
--- a/rllib/utils/exploration/gaussian_noise.py
+++ b/rllib/utils/exploration/gaussian_noise.py
@@ -9,7 +9,7 @@ from ray.rllib.utils.framework import try_import_tf, try_import_torch, \
     get_variable, TensorType
 from ray.rllib.utils.schedules.piecewise_schedule import PiecewiseSchedule
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 
@@ -96,7 +96,7 @@ class GaussianNoise(Exploration):
         deterministic_actions = action_dist.deterministic_sample()
 
         # Take a Gaussian sample with our stddev (mean=0.0) and scale it.
-        gaussian_sample = self.scale_schedule(ts) * tf.random_normal(
+        gaussian_sample = self.scale_schedule(ts) * tf.random.normal(
             tf.shape(deterministic_actions), stddev=self.stddev)
 
         # Stochastic actions could either be: random OR action + noise.
@@ -120,13 +120,13 @@ class GaussianNoise(Exploration):
             true_fn=lambda: stochastic_actions,
             false_fn=lambda: deterministic_actions)
         # Logp=always zero.
-        logp = tf.zeros(shape=(batch_size, ), dtype=tf.float32)
+        logp = tf.zeros(shape=(batch_size,), dtype=tf.float32)
 
         # Increment `last_timestep` by 1 (or set to `timestep`).
-        assign_op = \
-            tf.assign_add(self.last_timestep, 1) if timestep is None else \
-            tf.assign(self.last_timestep, timestep)
-        with tf.control_dependencies([assign_op]):
+        assign_op = (
+            tf1.assign_add(self.last_timestep, 1) if timestep is None else
+            tf1.assign(self.last_timestep, timestep))
+        with tf1.control_dependencies([assign_op]):
             return action, logp
 
     def _get_torch_exploration_action(self, action_dist, explore, timestep):
diff --git a/rllib/utils/exploration/ornstein_uhlenbeck_noise.py b/rllib/utils/exploration/ornstein_uhlenbeck_noise.py
index 72ace558c..7b0f98ea8 100644
--- a/rllib/utils/exploration/ornstein_uhlenbeck_noise.py
+++ b/rllib/utils/exploration/ornstein_uhlenbeck_noise.py
@@ -5,7 +5,7 @@ from ray.rllib.utils.exploration.gaussian_noise import GaussianNoise
 from ray.rllib.utils.framework import try_import_tf, try_import_torch, \
     get_variable
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 
@@ -91,11 +91,11 @@ class OrnsteinUhlenbeckNoise(GaussianNoise):
 
         # Apply base-scaled and time-annealed scaled OU-noise to
         # deterministic actions.
-        gaussian_sample = tf.random_normal(
+        gaussian_sample = tf.random.normal(
             shape=[self.action_space.low.size], stddev=self.stddev)
         ou_new = self.ou_theta * -self.ou_state + \
             self.ou_sigma * gaussian_sample
-        ou_state_new = tf.assign_add(self.ou_state, ou_new)
+        ou_state_new = tf1.assign_add(self.ou_state, ou_new)
         high_m_low = self.action_space.high - self.action_space.low
         high_m_low = tf.where(
             tf.math.is_inf(high_m_low), tf.ones_like(high_m_low), high_m_low)
@@ -122,13 +122,13 @@ class OrnsteinUhlenbeckNoise(GaussianNoise):
             false_fn=lambda: deterministic_actions)
         # Logp=always zero.
         batch_size = tf.shape(deterministic_actions)[0]
-        logp = tf.zeros(shape=(batch_size, ), dtype=tf.float32)
+        logp = tf.zeros(shape=(batch_size,), dtype=tf.float32)
 
         # Increment `last_timestep` by 1 (or set to `timestep`).
-        assign_op = \
-            tf.assign_add(self.last_timestep, 1) if timestep is None else \
-            tf.assign(self.last_timestep, timestep)
-        with tf.control_dependencies([assign_op, ou_state_new]):
+        assign_op = (
+            tf1.assign_add(self.last_timestep, 1) if timestep is None else
+            tf1.assign(self.last_timestep, timestep))
+        with tf1.control_dependencies([assign_op, ou_state_new]):
             return action, logp
 
     @override(GaussianNoise)
diff --git a/rllib/utils/exploration/parameter_noise.py b/rllib/utils/exploration/parameter_noise.py
index 6654bd829..abf59f188 100644
--- a/rllib/utils/exploration/parameter_noise.py
+++ b/rllib/utils/exploration/parameter_noise.py
@@ -13,7 +13,7 @@ from ray.rllib.utils.framework import get_variable
 from ray.rllib.utils.from_config import from_config
 from ray.rllib.utils.numpy import softmax, SMALL_NUMBER
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 
@@ -91,9 +91,9 @@ class ParameterNoise(Exploration):
             self.tf_remove_noise_op = \
                 self._tf_remove_noise_op()
             # Create convenience sample+add op for tf.
-            with tf.control_dependencies([self.tf_sample_new_noise_op]):
+            with tf1.control_dependencies([self.tf_sample_new_noise_op]):
                 add_op = self._tf_add_stored_noise_op()
-            with tf.control_dependencies([add_op]):
+            with tf1.control_dependencies([add_op]):
                 self.tf_sample_new_noise_and_add_op = tf.no_op()
 
         # Whether the Model's weights currently have noise added or not.
@@ -303,7 +303,7 @@ class ParameterNoise(Exploration):
         added_noises = []
         for noise in self.noise:
             added_noises.append(
-                tf.assign(
+                tf1.assign(
                     noise,
                     tf.random.normal(
                         shape=noise.shape,
@@ -361,9 +361,9 @@ class ParameterNoise(Exploration):
         """
         add_noise_ops = list()
         for var, noise in zip(self.model_variables, self.noise):
-            add_noise_ops.append(tf.assign_add(var, noise))
+            add_noise_ops.append(tf1.assign_add(var, noise))
         ret = tf.group(*tuple(add_noise_ops))
-        with tf.control_dependencies([ret]):
+        with tf1.control_dependencies([ret]):
             return tf.no_op()
 
     def _remove_noise(self, *, tf_sess=None):
@@ -400,9 +400,9 @@ class ParameterNoise(Exploration):
         """
         remove_noise_ops = list()
         for var, noise in zip(self.model_variables, self.noise):
-            remove_noise_ops.append(tf.assign_add(var, -noise))
+            remove_noise_ops.append(tf1.assign_add(var, -noise))
         ret = tf.group(*tuple(remove_noise_ops))
-        with tf.control_dependencies([ret]):
+        with tf1.control_dependencies([ret]):
             return tf.no_op()
 
     @override(Exploration)
diff --git a/rllib/utils/exploration/random.py b/rllib/utils/exploration/random.py
index 581883320..935848bbf 100644
--- a/rllib/utils/exploration/random.py
+++ b/rllib/utils/exploration/random.py
@@ -11,7 +11,7 @@ from ray.rllib.utils.framework import try_import_tf, try_import_torch, \
     TensorType
 from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 
diff --git a/rllib/utils/exploration/stochastic_sampling.py b/rllib/utils/exploration/stochastic_sampling.py
index a6431745b..f5cd7b003 100644
--- a/rllib/utils/exploration/stochastic_sampling.py
+++ b/rllib/utils/exploration/stochastic_sampling.py
@@ -8,7 +8,7 @@ from ray.rllib.utils.exploration.exploration import Exploration
 from ray.rllib.utils.framework import try_import_tf, try_import_torch, \
     TensorType
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 tree = try_import_tree()
 
diff --git a/rllib/utils/exploration/tests/test_explorations.py b/rllib/utils/exploration/tests/test_explorations.py
index 04bfe71ed..910cb5d5b 100644
--- a/rllib/utils/exploration/tests/test_explorations.py
+++ b/rllib/utils/exploration/tests/test_explorations.py
@@ -11,9 +11,7 @@ import ray.rllib.agents.impala as impala
 import ray.rllib.agents.pg as pg
 import ray.rllib.agents.ppo as ppo
 import ray.rllib.agents.sac as sac
-from ray.rllib.utils import check, framework_iterator, try_import_tf
-
-tf = try_import_tf()
+from ray.rllib.utils import check, framework_iterator
 
 
 def do_test_explorations(run,
diff --git a/rllib/utils/exploration/tests/test_parameter_noise.py b/rllib/utils/exploration/tests/test_parameter_noise.py
index a77d71bbe..b186bb70f 100644
--- a/rllib/utils/exploration/tests/test_parameter_noise.py
+++ b/rllib/utils/exploration/tests/test_parameter_noise.py
@@ -3,11 +3,8 @@ import unittest
 
 import ray.rllib.agents.ddpg as ddpg
 import ray.rllib.agents.dqn as dqn
-from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.test_utils import check, framework_iterator
 
-tf = try_import_tf()
-
 
 class TestParameterNoise(unittest.TestCase):
     def test_ddpg_parameter_noise(self):
diff --git a/rllib/utils/framework.py b/rllib/utils/framework.py
index 014d4d7d6..b57e4db77 100644
--- a/rllib/utils/framework.py
+++ b/rllib/utils/framework.py
@@ -21,7 +21,11 @@ def try_import_tf(error=False):
         error (bool): Whether to raise an error if tf cannot be imported.
 
     Returns:
-        The tf module (either from tf2.0.compat.v1 OR as tf1.x.
+        Tuple:
+            - tf1.x module (either from tf2.x.compat.v1 OR as tf1.x).
+            - tf module (resulting from `import tensorflow`).
+                Either tf1.x or 2.x.
+            - The actually installed tf version as int: 1 or 2.
 
     Raises:
         ImportError: If error=True and tf is not installed.
@@ -30,7 +34,7 @@ def try_import_tf(error=False):
     # that uses them: del os.environ["RLLIB_TEST_NO_TF_IMPORT"]
     if "RLLIB_TEST_NO_TF_IMPORT" in os.environ:
         logger.warning("Not importing TensorFlow for test purposes")
-        return None
+        return None, None, None
 
     if "TF_CPP_MIN_LOG_LEVEL" not in os.environ:
         os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
@@ -38,34 +42,31 @@ def try_import_tf(error=False):
     # Try to reuse already imported tf module. This will avoid going through
     # the initial import steps below and thereby switching off v2_behavior
     # (switching off v2 behavior twice breaks all-framework tests for eager).
+    was_imported = False
     if "tensorflow" in sys.modules:
         tf_module = sys.modules["tensorflow"]
-        # Try "reducing" tf to tf.compat.v1.
-        try:
-            tf_module = tf_module.compat.v1
-        # No compat.v1 -> return tf as is.
-        except AttributeError:
-            pass
-        return tf_module
+        was_imported = True
 
-    # Just in case. We should not go through the below twice.
-    assert "tensorflow" not in sys.modules
-
-    try:
-        # Try "reducing" tf to tf.compat.v1.
-        import tensorflow.compat.v1 as tf
-        tf.logging.set_verbosity(tf.logging.ERROR)
-        # Disable v2 eager mode.
-        tf.disable_v2_behavior()
-        return tf
-    except ImportError:
+    else:
         try:
-            import tensorflow as tf
-            return tf
+            import tensorflow as tf_module
         except ImportError as e:
             if error:
                 raise e
-            return None
+            return None, None, None
+
+    # Try "reducing" tf to tf.compat.v1.
+    try:
+        tf1_module = tf_module.compat.v1
+        if not was_imported:
+            tf1_module.disable_v2_behavior()
+    # No compat.v1 -> return tf as is.
+    except AttributeError:
+        tf1_module = tf_module
+
+    version = 2 if "2." in tf_module.__version__[:2] else 1
+
+    return tf1_module, tf_module, version
 
 
 def tf_function(tf_module):
@@ -221,16 +222,10 @@ def get_activation_fn(name, framework="tf"):
     else:
         if name in ["linear", None]:
             return None
-        tf = try_import_tf()
+        tf1, tf, tfv = try_import_tf()
         fn = getattr(tf.nn, name, None)
         if fn is not None:
             return fn
 
     raise ValueError("Unknown activation ({}) for framework={}!".format(
         name, framework))
-
-
-# This call should never happen inside a module's functions/classes
-# as it would re-disable tf-eager.
-tf = try_import_tf()
-torch, _ = try_import_torch()
diff --git a/rllib/utils/numpy.py b/rllib/utils/numpy.py
index 650d711f4..0a6f95516 100644
--- a/rllib/utils/numpy.py
+++ b/rllib/utils/numpy.py
@@ -2,7 +2,7 @@ import numpy as np
 
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 SMALL_NUMBER = 1e-6
diff --git a/rllib/utils/schedules/piecewise_schedule.py b/rllib/utils/schedules/piecewise_schedule.py
index 6c82c30a0..b37fb1839 100644
--- a/rllib/utils/schedules/piecewise_schedule.py
+++ b/rllib/utils/schedules/piecewise_schedule.py
@@ -2,7 +2,7 @@ from ray.rllib.utils.annotations import override
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.schedules.schedule import Schedule
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 def _linear_interpolation(l, r, alpha):
diff --git a/rllib/utils/schedules/polynomial_schedule.py b/rllib/utils/schedules/polynomial_schedule.py
index f13767358..b6402da80 100644
--- a/rllib/utils/schedules/polynomial_schedule.py
+++ b/rllib/utils/schedules/polynomial_schedule.py
@@ -1,7 +1,7 @@
 from ray.rllib.utils.schedules.schedule import Schedule
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class PolynomialSchedule(Schedule):
diff --git a/rllib/utils/schedules/schedule.py b/rllib/utils/schedules/schedule.py
index 52a3205c5..316f359fe 100644
--- a/rllib/utils/schedules/schedule.py
+++ b/rllib/utils/schedules/schedule.py
@@ -3,7 +3,7 @@ from abc import ABCMeta, abstractmethod
 from ray.rllib.utils.annotations import DeveloperAPI
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 @DeveloperAPI
diff --git a/rllib/utils/schedules/tests/test_schedules.py b/rllib/utils/schedules/tests/test_schedules.py
index 9576833ce..0fed37092 100644
--- a/rllib/utils/schedules/tests/test_schedules.py
+++ b/rllib/utils/schedules/tests/test_schedules.py
@@ -5,7 +5,7 @@ from ray.rllib.utils.schedules import ConstantSchedule, \
 from ray.rllib.utils import check, framework_iterator, try_import_tf
 from ray.rllib.utils.from_config import from_config
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 class TestSchedules(unittest.TestCase):
diff --git a/rllib/utils/test_utils.py b/rllib/utils/test_utils.py
index 2a9275028..444381c65 100644
--- a/rllib/utils/test_utils.py
+++ b/rllib/utils/test_utils.py
@@ -1,10 +1,11 @@
+import gym
 import logging
 import numpy as np
 
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 
-tf = try_import_tf()
-if tf:
+tf1, tf, tfv = try_import_tf()
+if tf1:
     eager_mode = None
     try:
         from tensorflow.python.eager.context import eager_mode
@@ -60,7 +61,7 @@ def framework_iterator(config=None,
         # Do we need a test session?
         sess = None
         if fw == "tf" and session is True:
-            sess = tf.Session()
+            sess = tf1.Session()
             sess.__enter__()
 
         print("framework={}".format(fw))
@@ -71,9 +72,9 @@ def framework_iterator(config=None,
         if fw == "tfe":
             eager_ctx = eager_mode()
             eager_ctx.__enter__()
-            assert tf.executing_eagerly()
+            assert tf1.executing_eagerly()
         elif fw == "tf":
-            assert not tf.executing_eagerly()
+            assert not tf1.executing_eagerly()
 
         yield fw if session is False else (fw, sess)
 
@@ -165,18 +166,18 @@ def check(x, y, decimals=5, atol=None, rtol=None, false=False):
                 raise e
     # Everything else (assume numeric or tf/torch.Tensor).
     else:
-        if tf is not None:
+        if tf1 is not None:
             # y should never be a Tensor (y=expected value).
-            if isinstance(y, tf.Tensor):
+            if isinstance(y, tf1.Tensor):
                 raise ValueError("`y` (expected value) must not be a Tensor. "
                                  "Use numpy.ndarray instead")
-            if isinstance(x, tf.Tensor):
+            if isinstance(x, tf1.Tensor):
                 # In eager mode, numpyize tensors.
-                if tf.executing_eagerly():
+                if tf1.executing_eagerly():
                     x = x.numpy()
                 # Otherwise, use a quick tf-session.
                 else:
-                    with tf.Session() as sess:
+                    with tf1.Session() as sess:
                         x = sess.run(x)
                         return check(
                             x,
@@ -264,12 +265,27 @@ def check_compute_single_action(trainer,
     except AttributeError:
         pol = trainer.policy
 
-    obs_space = pol.observation_space
     action_space = pol.action_space
 
     for what in [pol, trainer]:
-        method_to_test = trainer.compute_action if what is trainer else \
-            pol.compute_single_action
+        if what is trainer:
+            method_to_test = trainer.compute_action
+            # Get the obs-space from Workers.env (not Policy) due to possible
+            # pre-processor up front.
+            worker_set = getattr(
+                trainer, "workers", getattr(trainer, "_workers", None))
+            assert worker_set
+            if isinstance(worker_set, list):
+                obs_space = trainer.get_policy().observation_space
+                try:
+                    obs_space = obs_space.original_space
+                except AttributeError:
+                    pass
+            else:
+                obs_space = worker_set.local_worker().env.observation_space
+        else:
+            method_to_test = pol.compute_single_action
+            obs_space = pol.observation_space
 
         for explore in [True, False]:
             for full_fetch in ([False, True] if what is trainer else [False]):
@@ -279,7 +295,9 @@ def check_compute_single_action(trainer,
                 else:
                     call_kwargs["clip_actions"] = True
 
-                obs = np.clip(obs_space.sample(), -1.0, 1.0)
+                obs = obs_space.sample()
+                if isinstance(obs_space, gym.spaces.Box):
+                    obs = np.clip(obs, -1.0, 1.0)
                 state_in = None
                 if include_state:
                     state_in = pol.model.get_initial_state()
diff --git a/rllib/utils/tests/test_framework_agnostic_components.py b/rllib/utils/tests/test_framework_agnostic_components.py
index 400c16c40..5db64d7da 100644
--- a/rllib/utils/tests/test_framework_agnostic_components.py
+++ b/rllib/utils/tests/test_framework_agnostic_components.py
@@ -9,7 +9,7 @@ from ray.rllib.utils.framework import try_import_tf, try_import_torch
 from ray.rllib.utils.from_config import from_config
 from ray.rllib.utils.test_utils import check, framework_iterator
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 torch, _ = try_import_torch()
 
 
@@ -136,7 +136,7 @@ class TestFrameWorkAgnosticComponents(unittest.TestCase):
             # Test recognizing default package path.
             scope = None
             if sess:
-                scope = tf.variable_scope("exploration_object")
+                scope = tf1.variable_scope("exploration_object")
                 scope.__enter__()
             component = from_config(
                 Exploration, {
diff --git a/rllib/utils/tf_ops.py b/rllib/utils/tf_ops.py
index abdef2fc8..c6d55fa0f 100644
--- a/rllib/utils/tf_ops.py
+++ b/rllib/utils/tf_ops.py
@@ -1,6 +1,6 @@
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 
 
 def explained_variance(y, pred):
@@ -86,7 +86,7 @@ def make_tf_callable(session_or_none, dynamic_shape=False):
                             else:
                                 shape = v.shape
                             placeholders.append(
-                                tf.placeholder(
+                                tf1.placeholder(
                                     dtype=v.dtype,
                                     shape=shape,
                                     name="arg_{}".format(i)))
@@ -120,7 +120,7 @@ def scope_vars(scope, trainable_only=False):
     vars: [tf.Variable]
       list of variables in `scope`.
     """
-    return tf.get_collection(
-        tf.GraphKeys.TRAINABLE_VARIABLES
-        if trainable_only else tf.GraphKeys.VARIABLES,
+    return tf1.get_collection(
+        tf1.GraphKeys.TRAINABLE_VARIABLES
+        if trainable_only else tf1.GraphKeys.VARIABLES,
         scope=scope if isinstance(scope, str) else scope.name)
diff --git a/rllib/utils/tf_run_builder.py b/rllib/utils/tf_run_builder.py
index 4d891fbfa..82b904bd1 100644
--- a/rllib/utils/tf_run_builder.py
+++ b/rllib/utils/tf_run_builder.py
@@ -5,7 +5,7 @@ import time
 from ray.util.debug import log_once
 from ray.rllib.utils.framework import try_import_tf
 
-tf = try_import_tf()
+tf1, tf, tfv = try_import_tf()
 logger = logging.getLogger(__name__)
 
 
@@ -63,8 +63,8 @@ def run_timeline(sess, ops, debug_name, feed_dict={}, timeline_dir=None):
     if timeline_dir:
         from tensorflow.python.client import timeline
 
-        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
-        run_metadata = tf.RunMetadata()
+        run_options = tf1.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
+        run_metadata = tf1.RunMetadata()
         start = time.time()
         fetches = sess.run(
             ops,
diff --git a/scripts b/scripts
deleted file mode 120000
index 8f67c5cc2..000000000
--- a/scripts
+++ /dev/null
@@ -1 +0,0 @@
-ci/travis
\ No newline at end of file