diff --git a/python/ray/experimental/tf_utils.py b/python/ray/experimental/tf_utils.py index c528d94d6..6677161a4 100644 --- a/python/ray/experimental/tf_utils.py +++ b/python/ray/experimental/tf_utils.py @@ -4,7 +4,7 @@ import numpy as np from ray.rllib.utils import force_list from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() def unflatten(vector, shapes): @@ -79,24 +79,29 @@ class TensorFlowVariables: variable_names.append(tf_obj.node_def.name) self.variables = OrderedDict() variable_list = [ - v for v in tf.global_variables() + v for v in tf1.global_variables() if v.op.node_def.name in variable_names ] if input_variables is not None: variable_list += input_variables - for v in variable_list: - self.variables[v.op.node_def.name] = v - self.placeholders = {} - self.assignment_nodes = {} + if not tf1.executing_eagerly(): + for v in variable_list: + self.variables[v.op.node_def.name] = v - # Create new placeholders to put in custom weights. - for k, var in self.variables.items(): - self.placeholders[k] = tf.placeholder( - var.value().dtype, - var.get_shape().as_list(), - name="Placeholder_" + k) - self.assignment_nodes[k] = var.assign(self.placeholders[k]) + self.placeholders = {} + self.assignment_nodes = {} + + # Create new placeholders to put in custom weights. + for k, var in self.variables.items(): + self.placeholders[k] = tf1.placeholder( + var.value().dtype, + var.get_shape().as_list(), + name="Placeholder_" + k) + self.assignment_nodes[k] = var.assign(self.placeholders[k]) + else: + for v in variable_list: + self.variables[v.name] = v def set_session(self, sess): """Sets the current session used by the class. @@ -117,10 +122,12 @@ class TensorFlowVariables: def _check_sess(self): """Checks if the session is set, and if not throw an error message.""" - assert self.sess is not None, ("The session is not set. Set the " - "session either by passing it into the " - "TensorFlowVariables constructor or by " - "calling set_session(sess).") + if tf1.executing_eagerly(): + return + assert self.sess is not None, \ + "The session is not set. Set the session either by passing it " \ + "into the TensorFlowVariables constructor or by calling " \ + "set_session(sess)." def get_flat(self): """Gets the weights and returns them as a flat array. @@ -129,6 +136,11 @@ class TensorFlowVariables: 1D Array containing the flattened weights. """ self._check_sess() + # Eager mode. + if not self.sess: + return np.concatenate( + [v.numpy().flatten() for v in self.variables.values()]) + # Graph mode. return np.concatenate([ v.eval(session=self.sess).flatten() for v in self.variables.values() @@ -147,12 +159,16 @@ class TensorFlowVariables: self._check_sess() shapes = [v.get_shape().as_list() for v in self.variables.values()] arrays = unflatten(new_weights, shapes) - placeholders = [ - self.placeholders[k] for k, v in self.variables.items() - ] - self.sess.run( - list(self.assignment_nodes.values()), - feed_dict=dict(zip(placeholders, arrays))) + if not self.sess: + for v, a in zip(self.variables.values(), arrays): + v.assign(a) + else: + placeholders = [ + self.placeholders[k] for k, v in self.variables.items() + ] + self.sess.run( + list(self.assignment_nodes.values()), + feed_dict=dict(zip(placeholders, arrays))) def get_weights(self): """Returns a dictionary containing the weights of the network. @@ -161,6 +177,10 @@ class TensorFlowVariables: Dictionary mapping variable names to their weights. """ self._check_sess() + # Eager mode. + if not self.sess: + return self.variables + # Graph mode. return self.sess.run(self.variables) def set_weights(self, new_weights): diff --git a/rllib/BUILD b/rllib/BUILD index 7610a1ac0..67db2c8a6 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -344,6 +344,7 @@ py_test( args = ["--yaml-dir=tuned_examples/sac", "--torch"] ) + # TD3 py_test( name = "run_regression_tests_pendulum_td3_tf", @@ -1013,6 +1014,13 @@ py_test( srcs = ["models/tests/test_distributions.py"] ) +py_test( + name = "test_attention_nets", + tags = ["models"], + size = "small", + srcs = ["models/tests/test_attention_nets.py"] +) + # -------------------------------------------------------------------- # Optimizers and Memories # rllib/execution/ diff --git a/rllib/agents/a3c/a3c_tf_policy.py b/rllib/agents/a3c/a3c_tf_policy.py index 8c2d9146a..dde894cd9 100644 --- a/rllib/agents/a3c/a3c_tf_policy.py +++ b/rllib/agents/a3c/a3c_tf_policy.py @@ -9,7 +9,7 @@ from ray.rllib.policy.tf_policy import LearningRateSchedule from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class A3CLoss: diff --git a/rllib/agents/ars/ars_tf_policy.py b/rllib/agents/ars/ars_tf_policy.py index e04118681..6c2f38022 100644 --- a/rllib/agents/ars/ars_tf_policy.py +++ b/rllib/agents/ars/ars_tf_policy.py @@ -13,7 +13,7 @@ from ray.rllib.utils.filter import get_filter from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.spaces.space_utils import unbatch -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class ARSTFPolicy: @@ -29,8 +29,8 @@ class ARSTFPolicy: self.single_threaded = config.get("single_threaded", False) self.sess = make_session(single_threaded=self.single_threaded) - self.inputs = tf.placeholder(tf.float32, - [None] + list(self.preprocessor.shape)) + self.inputs = tf1.placeholder(tf.float32, + [None] + list(self.preprocessor.shape)) # Policy network. dist_class, dist_dim = ModelCatalog.get_action_dist( @@ -52,7 +52,7 @@ class ARSTFPolicy: self.num_params = sum( np.prod(variable.shape.as_list()) for _, variable in self.variables.variables.items()) - self.sess.run(tf.global_variables_initializer()) + self.sess.run(tf1.global_variables_initializer()) def compute_actions(self, observation, diff --git a/rllib/agents/ddpg/ddpg_tf_model.py b/rllib/agents/ddpg/ddpg_tf_model.py index dcaa17aab..84f8a0878 100644 --- a/rllib/agents/ddpg/ddpg_tf_model.py +++ b/rllib/agents/ddpg/ddpg_tf_model.py @@ -3,7 +3,7 @@ import numpy as np from ray.rllib.models.tf.tf_modelv2 import TFModelV2 from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class DDPGTFModel(TFModelV2): diff --git a/rllib/agents/ddpg/ddpg_tf_policy.py b/rllib/agents/ddpg/ddpg_tf_policy.py index 4d9730254..027ccba26 100644 --- a/rllib/agents/ddpg/ddpg_tf_policy.py +++ b/rllib/agents/ddpg/ddpg_tf_policy.py @@ -22,7 +22,7 @@ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.tf_ops import huber_loss, minimize_and_clip, \ make_tf_callable -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -126,18 +126,18 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch): target_model_out_tp1, _ = policy.target_model(input_dict_next, [], None) # Policy network evaluation. - with tf.variable_scope(POLICY_SCOPE, reuse=True): - # prev_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) + with tf1.variable_scope(POLICY_SCOPE, reuse=True): + # prev_update_ops = set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS)) policy_t = model.get_policy_output(model_out_t) # policy_batchnorm_update_ops = list( - # set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops) + # set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops) - with tf.variable_scope(POLICY_TARGET_SCOPE): + with tf1.variable_scope(POLICY_TARGET_SCOPE): policy_tp1 = \ policy.target_model.get_policy_output(target_model_out_tp1) # Action outputs. - with tf.variable_scope(ACTION_SCOPE, reuse=True): + with tf1.variable_scope(ACTION_SCOPE, reuse=True): if policy.config["smooth_target_policy"]: target_noise_clip = policy.config["target_noise_clip"] clipped_normal_sample = tf.clip_by_value( @@ -154,29 +154,29 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch): policy_tp1_smoothed = policy_tp1 # Q-net(s) evaluation. - # prev_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - with tf.variable_scope(Q_SCOPE): + # prev_update_ops = set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS)) + with tf1.variable_scope(Q_SCOPE): # Q-values for given actions & observations in given current q_t = model.get_q_values(model_out_t, train_batch[SampleBatch.ACTIONS]) - with tf.variable_scope(Q_SCOPE, reuse=True): + with tf1.variable_scope(Q_SCOPE, reuse=True): # Q-values for current policy (no noise) in given current state q_t_det_policy = model.get_q_values(model_out_t, policy_t) if twin_q: - with tf.variable_scope(TWIN_Q_SCOPE): + with tf1.variable_scope(TWIN_Q_SCOPE): twin_q_t = model.get_twin_q_values( model_out_t, train_batch[SampleBatch.ACTIONS]) # q_batchnorm_update_ops = list( - # set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops) + # set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops) # Target q-net(s) evaluation. - with tf.variable_scope(Q_TARGET_SCOPE): + with tf1.variable_scope(Q_TARGET_SCOPE): q_tp1 = policy.target_model.get_q_values(target_model_out_tp1, policy_tp1_smoothed) if twin_q: - with tf.variable_scope(TWIN_Q_TARGET_SCOPE): + with tf1.variable_scope(TWIN_Q_TARGET_SCOPE): twin_q_tp1 = policy.target_model.get_twin_q_values( target_model_out_tp1, policy_tp1_smoothed) @@ -220,10 +220,10 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch): if l2_reg is not None: for var in policy.model.policy_variables(): if "bias" not in var.name: - actor_loss += (l2_reg * tf.nn.l2_loss(var)) + actor_loss += (l2_reg * tf1.nn.l2_loss(var)) for var in policy.model.q_variables(): if "bias" not in var.name: - critic_loss += (l2_reg * tf.nn.l2_loss(var)) + critic_loss += (l2_reg * tf1.nn.l2_loss(var)) # Model self-supervised losses. if policy.config["use_state_preprocessor"]: @@ -259,9 +259,9 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch): def make_ddpg_optimizers(policy, config): # Create separate optimizers for actor & critic losses. - policy._actor_optimizer = tf.train.AdamOptimizer( + policy._actor_optimizer = tf1.train.AdamOptimizer( learning_rate=config["actor_lr"]) - policy._critic_optimizer = tf.train.AdamOptimizer( + policy._critic_optimizer = tf1.train.AdamOptimizer( learning_rate=config["critic_lr"]) return None @@ -286,7 +286,7 @@ def build_apply_op(policy, optimizer, grads_and_vars): # For policy gradient, update policy net one time v.s. # update critic net `policy_delay` time(s). should_apply_actor_opt = tf.equal( - tf.mod(policy.global_step, policy.config["policy_delay"]), 0) + tf.math.floormod(policy.global_step, policy.config["policy_delay"]), 0) def make_apply_op(): return policy._actor_optimizer.apply_gradients( @@ -299,7 +299,7 @@ def build_apply_op(policy, optimizer, grads_and_vars): critic_op = policy._critic_optimizer.apply_gradients( policy._critic_grads_and_vars) # Increment global step & apply ops. - with tf.control_dependencies([tf.assign_add(policy.global_step, 1)]): + with tf1.control_dependencies([tf1.assign_add(policy.global_step, 1)]): return tf.group(actor_op, critic_op) @@ -341,7 +341,7 @@ def build_ddpg_stats(policy, batch): def before_init_fn(policy, obs_space, action_space, config): # Create global step for counting the number of update operations. - policy.global_step = tf.train.get_or_create_global_step() + policy.global_step = tf1.train.get_or_create_global_step() class ComputeTDErrorMixin: diff --git a/rllib/agents/ddpg/ddpg_torch_policy.py b/rllib/agents/ddpg/ddpg_torch_policy.py index b3f49f102..b8ad9f801 100644 --- a/rllib/agents/ddpg/ddpg_torch_policy.py +++ b/rllib/agents/ddpg/ddpg_torch_policy.py @@ -49,10 +49,10 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch): target_model_out_tp1, _ = policy.target_model(input_dict_next, [], None) # Policy network evaluation. - # prev_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) + # prev_update_ops = set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS)) policy_t = model.get_policy_output(model_out_t) # policy_batchnorm_update_ops = list( - # set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops) + # set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops) policy_tp1 = \ policy.target_model.get_policy_output(target_model_out_tp1) @@ -73,7 +73,7 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch): policy_tp1_smoothed = policy_tp1 # Q-net(s) evaluation. - # prev_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) + # prev_update_ops = set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS)) # Q-values for given actions & observations in given current q_t = model.get_q_values(model_out_t, train_batch[SampleBatch.ACTIONS]) @@ -86,7 +86,7 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch): twin_q_t = model.get_twin_q_values(model_out_t, train_batch[SampleBatch.ACTIONS]) # q_batchnorm_update_ops = list( - # set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops) + # set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops) # Target q-net(s) evaluation. q_tp1 = policy.target_model.get_q_values(target_model_out_tp1, diff --git a/rllib/agents/ddpg/noop_model.py b/rllib/agents/ddpg/noop_model.py index 8da8af4ed..4dba83b9d 100644 --- a/rllib/agents/ddpg/noop_model.py +++ b/rllib/agents/ddpg/noop_model.py @@ -4,7 +4,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +_, tf, _ = try_import_tf() class NoopModel(TFModelV2): diff --git a/rllib/agents/ddpg/tests/test_td3.py b/rllib/agents/ddpg/tests/test_td3.py index 80dfe92d4..1c0356278 100644 --- a/rllib/agents/ddpg/tests/test_td3.py +++ b/rllib/agents/ddpg/tests/test_td3.py @@ -6,7 +6,7 @@ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.test_utils import check, check_compute_single_action, \ framework_iterator -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class TestTD3(unittest.TestCase): @@ -32,8 +32,9 @@ class TestTD3(unittest.TestCase): # Test against all frameworks. for _ in framework_iterator(config, frameworks="tf"): + lcl_config = config.copy() # Default GaussianNoise setup. - trainer = td3.TD3Trainer(config=config, env="Pendulum-v0") + trainer = td3.TD3Trainer(config=lcl_config, env="Pendulum-v0") # Setting explore=False should always return the same action. a_ = trainer.compute_action(obs, explore=False) for _ in range(50): @@ -44,9 +45,10 @@ class TestTD3(unittest.TestCase): for _ in range(50): actions.append(trainer.compute_action(obs)) check(np.std(actions), 0.0, false=True) + trainer.stop() # Check randomness at beginning. - config["exploration_config"] = { + lcl_config["exploration_config"] = { # Act randomly at beginning ... "random_timesteps": 30, # Then act very closely to deterministic actions thereafter. @@ -54,7 +56,7 @@ class TestTD3(unittest.TestCase): "initial_scale": 0.001, "final_scale": 0.001, } - trainer = td3.TD3Trainer(config=config, env="Pendulum-v0") + trainer = td3.TD3Trainer(config=lcl_config, env="Pendulum-v0") # ts=1 (get a deterministic action as per explore=False). deterministic_action = trainer.compute_action(obs, explore=False) # ts=2-5 (in random window). @@ -73,6 +75,7 @@ class TestTD3(unittest.TestCase): for _ in range(50): a = trainer.compute_action(obs, explore=False) check(a, deterministic_action) + trainer.stop() if __name__ == "__main__": diff --git a/rllib/agents/dqn/distributional_q_tf_model.py b/rllib/agents/dqn/distributional_q_tf_model.py index c3e936f8e..cc30e2d9d 100644 --- a/rllib/agents/dqn/distributional_q_tf_model.py +++ b/rllib/agents/dqn/distributional_q_tf_model.py @@ -3,7 +3,7 @@ import numpy as np from ray.rllib.models.tf.tf_modelv2 import TFModelV2 from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class DistributionalQTFModel(TFModelV2): @@ -155,7 +155,7 @@ class DistributionalQTFModel(TFModelV2): units=num_atoms, activation=None)(state_out) return state_score - if tf.executing_eagerly(): + if tf1.executing_eagerly(): from tensorflow.python.ops import variable_scope # Have to use a variable store to reuse variables in eager mode store = variable_scope.EagerVariableStore() @@ -163,30 +163,32 @@ class DistributionalQTFModel(TFModelV2): # Save the scope objects, since in eager we will execute this # path repeatedly and there is no guarantee it will always be run # in the same original scope. - with tf.variable_scope(name + "/action_value") as action_scope: + with tf1.variable_scope(name + "/action_value") as action_scope: pass - with tf.variable_scope(name + "/state_value") as state_scope: + with tf1.variable_scope(name + "/state_value") as state_scope: pass def build_action_value_in_scope(model_out): with store.as_default(): - with tf.variable_scope(action_scope, reuse=tf.AUTO_REUSE): + with tf1.variable_scope( + action_scope, reuse=tf1.AUTO_REUSE): return build_action_value(model_out) def build_state_score_in_scope(model_out): with store.as_default(): - with tf.variable_scope(state_scope, reuse=tf.AUTO_REUSE): + with tf1.variable_scope( + state_scope, reuse=tf1.AUTO_REUSE): return build_state_score(model_out) else: def build_action_value_in_scope(model_out): - with tf.variable_scope( - name + "/action_value", reuse=tf.AUTO_REUSE): + with tf1.variable_scope( + name + "/action_value", reuse=tf1.AUTO_REUSE): return build_action_value(model_out) def build_state_score_in_scope(model_out): - with tf.variable_scope( - name + "/state_value", reuse=tf.AUTO_REUSE): + with tf1.variable_scope( + name + "/state_value", reuse=tf1.AUTO_REUSE): return build_state_score(model_out) q_out = build_action_value_in_scope(self.model_out) @@ -241,33 +243,33 @@ class DistributionalQTFModel(TFModelV2): epsilon_w = tf.matmul( a=tf.expand_dims(epsilon_in, -1), b=tf.expand_dims(epsilon_out, 0)) epsilon_b = epsilon_out - sigma_w = tf.get_variable( + sigma_w = tf1.get_variable( name=prefix + "_sigma_w", shape=[in_size, out_size], dtype=tf.float32, - initializer=tf.random_uniform_initializer( + initializer=tf1.random_uniform_initializer( minval=-1.0 / np.sqrt(float(in_size)), maxval=1.0 / np.sqrt(float(in_size)))) # TF noise generation can be unreliable on GPU # If generating the noise on the CPU, # lowering sigma0 to 0.1 may be helpful - sigma_b = tf.get_variable( + sigma_b = tf1.get_variable( name=prefix + "_sigma_b", shape=[out_size], dtype=tf.float32, # 0.5~GPU, 0.1~CPU - initializer=tf.constant_initializer( + initializer=tf1.constant_initializer( sigma0 / np.sqrt(float(in_size)))) - w = tf.get_variable( + w = tf1.get_variable( name=prefix + "_fc_w", shape=[in_size, out_size], dtype=tf.float32, - initializer=tf.initializers.glorot_uniform()) - b = tf.get_variable( + initializer=tf.initializers.GlorotUniform()) + b = tf1.get_variable( name=prefix + "_fc_b", shape=[out_size], dtype=tf.float32, - initializer=tf.zeros_initializer()) + initializer=tf.initializers.Zeros()) action_activation = \ tf.keras.layers.Lambda(lambda x: tf.matmul( diff --git a/rllib/agents/dqn/dqn_tf_policy.py b/rllib/agents/dqn/dqn_tf_policy.py index c5e13bf5e..7faaf6da9 100644 --- a/rllib/agents/dqn/dqn_tf_policy.py +++ b/rllib/agents/dqn/dqn_tf_policy.py @@ -17,7 +17,7 @@ from ray.rllib.utils.tf_ops import huber_loss, reduce_mean_ignore_inf, \ minimize_and_clip from ray.rllib.utils.tf_ops import make_tf_callable -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() Q_SCOPE = "q_func" Q_TARGET_SCOPE = "target_q_func" @@ -253,7 +253,7 @@ def build_q_losses(policy, model, _, train_batch): def adam_optimizer(policy, config): - return tf.train.AdamOptimizer( + return tf1.train.AdamOptimizer( learning_rate=policy.cur_lr, epsilon=config["adam_epsilon"]) diff --git a/rllib/agents/dqn/simple_q_model.py b/rllib/agents/dqn/simple_q_model.py index 432071775..54eee6000 100644 --- a/rllib/agents/dqn/simple_q_model.py +++ b/rllib/agents/dqn/simple_q_model.py @@ -1,7 +1,7 @@ from ray.rllib.models.tf.tf_modelv2 import TFModelV2 from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class SimpleQModel(TFModelV2): diff --git a/rllib/agents/dqn/simple_q_tf_policy.py b/rllib/agents/dqn/simple_q_tf_policy.py index a9879c434..c6a70615b 100644 --- a/rllib/agents/dqn/simple_q_tf_policy.py +++ b/rllib/agents/dqn/simple_q_tf_policy.py @@ -15,7 +15,7 @@ from ray.rllib.policy.tf_policy_template import build_tf_policy from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.tf_ops import huber_loss, make_tf_callable -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) Q_SCOPE = "q_func" diff --git a/rllib/agents/dqn/tests/test_dqn.py b/rllib/agents/dqn/tests/test_dqn.py index 491103c3a..287b46c77 100644 --- a/rllib/agents/dqn/tests/test_dqn.py +++ b/rllib/agents/dqn/tests/test_dqn.py @@ -7,7 +7,7 @@ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.test_utils import check, check_compute_single_action, \ framework_iterator -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class TestDQN(unittest.TestCase): diff --git a/rllib/agents/dqn/tests/test_simple_q.py b/rllib/agents/dqn/tests/test_simple_q.py index 9039cbcc6..057d6350c 100644 --- a/rllib/agents/dqn/tests/test_simple_q.py +++ b/rllib/agents/dqn/tests/test_simple_q.py @@ -11,7 +11,7 @@ from ray.rllib.utils.numpy import fc, one_hot, huber_loss from ray.rllib.utils.test_utils import check, check_compute_single_action, \ framework_iterator -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class TestSimpleQ(unittest.TestCase): diff --git a/rllib/agents/es/es_tf_policy.py b/rllib/agents/es/es_tf_policy.py index 73964179d..c739a906c 100644 --- a/rllib/agents/es/es_tf_policy.py +++ b/rllib/agents/es/es_tf_policy.py @@ -14,7 +14,7 @@ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space, \ unbatch -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() tree = try_import_tree() @@ -60,9 +60,9 @@ def rollout(policy, env, timestep_limit=None, add_noise=False, offset=0.0): def make_session(single_threaded): if not single_threaded: - return tf.Session() - return tf.Session( - config=tf.ConfigProto( + return tf1.Session() + return tf1.Session( + config=tf1.ConfigProto( inter_op_parallelism_threads=1, intra_op_parallelism_threads=1)) @@ -77,8 +77,8 @@ class ESTFPolicy: self.preprocessor.shape) self.single_threaded = config.get("single_threaded", False) self.sess = make_session(single_threaded=self.single_threaded) - self.inputs = tf.placeholder(tf.float32, - [None] + list(self.preprocessor.shape)) + self.inputs = tf1.placeholder(tf.float32, + [None] + list(self.preprocessor.shape)) # Policy network. dist_class, dist_dim = ModelCatalog.get_action_dist( @@ -98,7 +98,7 @@ class ESTFPolicy: self.num_params = sum( np.prod(variable.shape.as_list()) for _, variable in self.variables.variables.items()) - self.sess.run(tf.global_variables_initializer()) + self.sess.run(tf1.global_variables_initializer()) def compute_actions(self, observation, diff --git a/rllib/agents/impala/tests/test_impala.py b/rllib/agents/impala/tests/test_impala.py index 7089a2281..e9885ac66 100644 --- a/rllib/agents/impala/tests/test_impala.py +++ b/rllib/agents/impala/tests/test_impala.py @@ -6,7 +6,7 @@ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.test_utils import check_compute_single_action, \ framework_iterator -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class TestIMPALA(unittest.TestCase): diff --git a/rllib/agents/impala/tests/test_vtrace.py b/rllib/agents/impala/tests/test_vtrace.py index b0277c036..53d0d42a5 100644 --- a/rllib/agents/impala/tests/test_vtrace.py +++ b/rllib/agents/impala/tests/test_vtrace.py @@ -30,7 +30,7 @@ from ray.rllib.utils.framework import try_import_tf, try_import_torch from ray.rllib.utils.numpy import softmax from ray.rllib.utils.test_utils import check, framework_iterator -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, nn = try_import_torch() @@ -185,20 +185,20 @@ class VtraceTest(unittest.TestCase): # can deal with that. inputs_ = { # T, B, NUM_ACTIONS - "behaviour_policy_logits": tf.placeholder( + "behaviour_policy_logits": tf1.placeholder( dtype=tf.float32, shape=[None, None, None]), # T, B, NUM_ACTIONS - "target_policy_logits": tf.placeholder( + "target_policy_logits": tf1.placeholder( dtype=tf.float32, shape=[None, None, None]), - "actions": tf.placeholder( + "actions": tf1.placeholder( dtype=tf.int32, shape=[None, None]), - "discounts": tf.placeholder( + "discounts": tf1.placeholder( dtype=tf.float32, shape=[None, None]), - "rewards": tf.placeholder( + "rewards": tf1.placeholder( dtype=tf.float32, shape=[None, None]), - "values": tf.placeholder( + "values": tf1.placeholder( dtype=tf.float32, shape=[None, None]), - "bootstrap_value": tf.placeholder( + "bootstrap_value": tf1.placeholder( dtype=tf.float32, shape=[None]), } else: @@ -282,15 +282,15 @@ class VtraceTest(unittest.TestCase): vtrace = vtrace_tf if fw != "torch" else vtrace_torch if fw == "tf": inputs_ = { - "log_rhos": tf.placeholder( + "log_rhos": tf1.placeholder( dtype=tf.float32, shape=[None, None, 1]), - "discounts": tf.placeholder( + "discounts": tf1.placeholder( dtype=tf.float32, shape=[None, None, 1]), - "rewards": tf.placeholder( + "rewards": tf1.placeholder( dtype=tf.float32, shape=[None, None, 42]), - "values": tf.placeholder( + "values": tf1.placeholder( dtype=tf.float32, shape=[None, None, 42]), - "bootstrap_value": tf.placeholder( + "bootstrap_value": tf1.placeholder( dtype=tf.float32, shape=[None, 42]) } else: @@ -310,16 +310,16 @@ class VtraceTest(unittest.TestCase): vtrace = vtrace_tf if fw != "torch" else vtrace_torch if fw == "tf": inputs_ = { - "log_rhos": tf.placeholder( + "log_rhos": tf1.placeholder( dtype=tf.float32, shape=[None, None, 1]), - "discounts": tf.placeholder( + "discounts": tf1.placeholder( dtype=tf.float32, shape=[None, None, 1]), - "rewards": tf.placeholder( + "rewards": tf1.placeholder( dtype=tf.float32, shape=[None, None, 42]), - "values": tf.placeholder( + "values": tf1.placeholder( dtype=tf.float32, shape=[None, None, 42]), # Should be [None, 42]. - "bootstrap_value": tf.placeholder( + "bootstrap_value": tf1.placeholder( dtype=tf.float32, shape=[None]) } else: diff --git a/rllib/agents/impala/vtrace_tf.py b/rllib/agents/impala/vtrace_tf.py index aa6ab5c7a..fb612c57e 100644 --- a/rllib/agents/impala/vtrace_tf.py +++ b/rllib/agents/impala/vtrace_tf.py @@ -33,7 +33,7 @@ import collections from ray.rllib.models.tf.tf_action_dist import Categorical from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() VTraceFromLogitsReturns = collections.namedtuple("VTraceFromLogitsReturns", [ "vs", "pg_advantages", "log_rhos", "behaviour_action_log_probs", @@ -222,7 +222,7 @@ def multi_from_logits(behaviour_policy_logits, behaviour_policy_logits[i].shape.assert_has_rank(3) target_policy_logits[i].shape.assert_has_rank(3) - with tf.name_scope( + with tf1.name_scope( name, values=[ behaviour_policy_logits, target_policy_logits, actions, @@ -332,21 +332,22 @@ def from_importance_weights(log_rhos, if clip_pg_rho_threshold is not None: clip_pg_rho_threshold.shape.assert_has_rank(0) - with tf.name_scope( + with tf1.name_scope( name, values=[log_rhos, discounts, rewards, values, bootstrap_value]): - rhos = tf.exp(log_rhos) + rhos = tf.math.exp(log_rhos) if clip_rho_threshold is not None: clipped_rhos = tf.minimum( clip_rho_threshold, rhos, name="clipped_rhos") - tf.summary.histogram("clipped_rhos_1000", tf.minimum(1000.0, rhos)) - tf.summary.scalar( + tf1.summary.histogram( + "clipped_rhos_1000", tf.minimum(1000.0, rhos)) + tf1.summary.scalar( "num_of_clipped_rhos", tf.reduce_sum( tf.cast( tf.equal(clipped_rhos, clip_rho_threshold), tf.int32))) - tf.summary.scalar("size_of_clipped_rhos", tf.size(clipped_rhos)) + tf1.summary.scalar("size_of_clipped_rhos", tf.size(clipped_rhos)) else: clipped_rhos = rhos diff --git a/rllib/agents/impala/vtrace_tf_policy.py b/rllib/agents/impala/vtrace_tf_policy.py index a8baf9bbf..0237772bc 100644 --- a/rllib/agents/impala/vtrace_tf_policy.py +++ b/rllib/agents/impala/vtrace_tf_policy.py @@ -16,7 +16,7 @@ from ray.rllib.policy.tf_policy import LearningRateSchedule, \ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.tf_ops import explained_variance -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -253,10 +253,11 @@ def postprocess_trajectory(policy, def choose_optimizer(policy, config): if policy.config["opt_type"] == "adam": - return tf.train.AdamOptimizer(policy.cur_lr) + return tf1.train.AdamOptimizer(policy.cur_lr) else: - return tf.train.RMSPropOptimizer(policy.cur_lr, config["decay"], - config["momentum"], config["epsilon"]) + return tf1.train.RMSPropOptimizer( + policy.cur_lr, + config["decay"], config["momentum"], config["epsilon"]) def clip_gradients(policy, optimizer, loss): diff --git a/rllib/agents/maml/maml_tf_policy.py b/rllib/agents/maml/maml_tf_policy.py index 541cab675..f4a0a9ca5 100644 --- a/rllib/agents/maml/maml_tf_policy.py +++ b/rllib/agents/maml/maml_tf_policy.py @@ -9,7 +9,7 @@ from ray.rllib.agents.ppo.ppo_tf_policy import postprocess_ppo_gae, \ vf_preds_fetches, clip_gradients, setup_config, ValueNetworkMixin from ray.rllib.utils.framework import get_activation_fn -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -33,7 +33,7 @@ def PPOLoss(dist_class, pi_new_logp = curr_dist.logp(actions) pi_old_logp = prev_dist.logp(actions) - logp_ratio = tf.exp(pi_new_logp - pi_old_logp) + logp_ratio = tf.math.exp(pi_new_logp - pi_old_logp) if clip_loss: return tf.minimum( advantages * logp_ratio, @@ -49,10 +49,10 @@ def PPOLoss(dist_class, def vf_loss(value_fn, value_targets, vf_preds, vf_clip_param=0.1): # GAE Value Function Loss - vf_loss1 = tf.square(value_fn - value_targets) + vf_loss1 = tf.math.square(value_fn - value_targets) vf_clipped = vf_preds + tf.clip_by_value(value_fn - vf_preds, -vf_clip_param, vf_clip_param) - vf_loss2 = tf.square(vf_clipped - value_targets) + vf_loss2 = tf.math.square(vf_clipped - value_targets) vf_loss = tf.maximum(vf_loss1, vf_loss2) return vf_loss @@ -104,7 +104,7 @@ class WorkerLoss(object): vf_clip_param=vf_clip_param, vf_loss_coeff=vf_loss_coeff, clip_loss=clip_loss) - self.loss = tf.Print(self.loss, ["Worker Adapt Loss", self.loss]) + self.loss = tf1.Print(self.loss, ["Worker Adapt Loss", self.loss]) # This is the Meta-Update computation graph for main (meta-update step) @@ -230,7 +230,7 @@ class MAMLLoss(object): tf.multiply(self.cur_kl_coeff, mean_inner_kl)) self.loss = tf.reduce_mean(tf.stack(ppo_obj, axis=0)) + self.inner_kl_loss - self.loss = tf.Print( + self.loss = tf1.Print( self.loss, ["Meta-Loss", self.loss, "Inner KL", self.mean_inner_kl]) @@ -309,7 +309,7 @@ class MAMLLoss(object): def maml_loss(policy, model, dist_class, train_batch): logits, state = model.from_batch(train_batch) - policy._loss_input_dict["split"] = tf.placeholder( + policy._loss_input_dict["split"] = tf1.placeholder( tf.int32, name="Meta-Update-Splitting", shape=(policy.config["inner_adaptation_steps"] + 1, @@ -333,8 +333,8 @@ def maml_loss(policy, model, dist_class, train_batch): vf_loss_coeff=policy.config["vf_loss_coeff"], clip_loss=False) else: - policy.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, - tf.get_variable_scope().name) + policy.var_list = tf1.get_collection(tf1.GraphKeys.TRAINABLE_VARIABLES, + tf1.get_variable_scope().name) policy.loss_obj = MAMLLoss( model=model, dist_class=dist_class, @@ -380,8 +380,8 @@ class KLCoeffMixin: self.kl_coeff_val = [config["kl_coeff"] ] * config["inner_adaptation_steps"] self.kl_target = self.config["kl_target"] - self.kl_coeff = tf.get_variable( - initializer=tf.constant_initializer(self.kl_coeff_val), + self.kl_coeff = tf1.get_variable( + initializer=tf.keras.initializers.Constant(self.kl_coeff_val), name="kl_coeff", shape=(config["inner_adaptation_steps"]), trainable=False, @@ -404,8 +404,8 @@ def maml_optimizer_fn(policy, config): Meta-Policy uses Adam optimizer for meta-update """ if not config["worker_index"]: - return tf.train.AdamOptimizer(learning_rate=config["lr"]) - return tf.train.GradientDescentOptimizer(learning_rate=config["inner_lr"]) + return tf1.train.AdamOptimizer(learning_rate=config["lr"]) + return tf1.train.GradientDescentOptimizer(learning_rate=config["inner_lr"]) def setup_mixins(policy, obs_space, action_space, config): diff --git a/rllib/agents/marwil/marwil_tf_policy.py b/rllib/agents/marwil/marwil_tf_policy.py index 947142f1d..cb00f88c0 100644 --- a/rllib/agents/marwil/marwil_tf_policy.py +++ b/rllib/agents/marwil/marwil_tf_policy.py @@ -6,7 +6,7 @@ from ray.rllib.policy.tf_policy_template import build_tf_policy from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class ValueNetworkMixin: @@ -37,13 +37,13 @@ class ReweightedImitationLoss: # advantage estimation adv = cumulative_rewards - state_values # update averaged advantage norm - update_adv_norm = tf.assign_add( + update_adv_norm = tf1.assign_add( ref=policy._ma_adv_norm, value=1e-6 * ( tf.reduce_mean(tf.math.square(adv)) - policy._ma_adv_norm)) # exponentially weighted advantages - with tf.control_dependencies([update_adv_norm]): + with tf1.control_dependencies([update_adv_norm]): exp_advs = tf.math.exp(beta * tf.math.divide( adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm))) @@ -125,7 +125,7 @@ def setup_mixins(policy, obs_space, action_space, config): ValueNetworkMixin.__init__(policy) # Set up a tf-var for the moving avg (do this here to make it work with # eager mode). - policy._ma_adv_norm = tf.get_variable( + policy._ma_adv_norm = tf1.get_variable( name="moving_average_of_advantage_norm", dtype=tf.float32, initializer=100.0, diff --git a/rllib/agents/marwil/tests/test_marwil.py b/rllib/agents/marwil/tests/test_marwil.py index bc49f39ea..fa6a9a98d 100644 --- a/rllib/agents/marwil/tests/test_marwil.py +++ b/rllib/agents/marwil/tests/test_marwil.py @@ -6,7 +6,7 @@ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.test_utils import check_compute_single_action, \ framework_iterator -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class TestMARWIL(unittest.TestCase): diff --git a/rllib/agents/pg/pg_tf_policy.py b/rllib/agents/pg/pg_tf_policy.py index 8f937a8de..88ccc2ac7 100644 --- a/rllib/agents/pg/pg_tf_policy.py +++ b/rllib/agents/pg/pg_tf_policy.py @@ -5,7 +5,7 @@ from ray.rllib.policy.tf_policy_template import build_tf_policy from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() def post_process_advantages(policy, diff --git a/rllib/agents/ppo/appo_tf_policy.py b/rllib/agents/ppo/appo_tf_policy.py index c733890ec..da908966c 100644 --- a/rllib/agents/ppo/appo_tf_policy.py +++ b/rllib/agents/ppo/appo_tf_policy.py @@ -21,7 +21,7 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() POLICY_SCOPE = "func" TARGET_POLICY_SCOPE = "target_func" @@ -65,7 +65,7 @@ class PPOSurrogateLoss: def reduce_mean_valid(t): return tf.reduce_mean(tf.boolean_mask(t, valid_mask)) - logp_ratio = tf.exp(actions_logp - prev_actions_logp) + logp_ratio = tf.math.exp(actions_logp - prev_actions_logp) surrogate_loss = tf.minimum( advantages * logp_ratio, @@ -170,7 +170,7 @@ class VTraceSurrogateLoss: tf.float32)) self.is_ratio = tf.clip_by_value( - tf.exp(prev_actions_logp - old_policy_actions_logp), 0.0, 2.0) + tf.math.exp(prev_actions_logp - old_policy_actions_logp), 0.0, 2.0) logp_ratio = self.is_ratio * tf.exp(actions_logp - prev_actions_logp) advantages = self.vtrace_returns.pg_advantages diff --git a/rllib/agents/ppo/ppo.py b/rllib/agents/ppo/ppo.py index 082a69696..62aa1ba7b 100644 --- a/rllib/agents/ppo/ppo.py +++ b/rllib/agents/ppo/ppo.py @@ -7,9 +7,6 @@ from ray.rllib.execution.rollout_ops import ParallelRollouts, ConcatBatches, \ StandardizeFields, SelectExperiences from ray.rllib.execution.train_ops import TrainOneStep, TrainTFMultiGPU from ray.rllib.execution.metric_ops import StandardMetricsReporting -from ray.rllib.utils.framework import try_import_tf - -tf = try_import_tf() logger = logging.getLogger(__name__) diff --git a/rllib/agents/ppo/ppo_tf_policy.py b/rllib/agents/ppo/ppo_tf_policy.py index 963d4d816..f5af4281f 100644 --- a/rllib/agents/ppo/ppo_tf_policy.py +++ b/rllib/agents/ppo/ppo_tf_policy.py @@ -10,7 +10,7 @@ from ray.rllib.policy.tf_policy_template import build_tf_policy from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -174,7 +174,7 @@ def postprocess_ppo_gae(policy, else: next_state = [] for i in range(policy.num_state_tensors()): - next_state.append([sample_batch["state_out_{}".format(i)][-1]]) + next_state.append(sample_batch["state_out_{}".format(i)][-1]) last_r = policy._value(sample_batch[SampleBatch.NEXT_OBS][-1], sample_batch[SampleBatch.ACTIONS][-1], sample_batch[SampleBatch.REWARDS][-1], @@ -206,7 +206,7 @@ class KLCoeffMixin: # KL Coefficient self.kl_coeff_val = config["kl_coeff"] self.kl_target = config["kl_target"] - self.kl_coeff = tf.get_variable( + self.kl_coeff = tf1.get_variable( initializer=tf.constant_initializer(self.kl_coeff_val), name="kl_coeff", shape=(), diff --git a/rllib/agents/ppo/ppo_torch_policy.py b/rllib/agents/ppo/ppo_torch_policy.py index f94d18d6a..2d2f8e1fc 100644 --- a/rllib/agents/ppo/ppo_torch_policy.py +++ b/rllib/agents/ppo/ppo_torch_policy.py @@ -194,7 +194,7 @@ class ValueNetworkMixin: SampleBatch.PREV_REWARDS: convert_to_torch_tensor( np.asarray([prev_reward])), "is_training": False, - }, [convert_to_torch_tensor(np.asarray(s)) for s in state], + }, [convert_to_torch_tensor(np.asarray([s])) for s in state], convert_to_torch_tensor(np.asarray([1]))) return self.model.value_function()[0] diff --git a/rllib/agents/ppo/tests/test_appo.py b/rllib/agents/ppo/tests/test_appo.py index de21398fc..7161bb83a 100644 --- a/rllib/agents/ppo/tests/test_appo.py +++ b/rllib/agents/ppo/tests/test_appo.py @@ -2,12 +2,9 @@ import unittest import ray import ray.rllib.agents.ppo as ppo -from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.test_utils import check_compute_single_action, \ framework_iterator -tf = try_import_tf() - class TestAPPO(unittest.TestCase): @classmethod diff --git a/rllib/agents/ppo/tests/test_ddppo.py b/rllib/agents/ppo/tests/test_ddppo.py index 25cd56c27..4c56a22ee 100644 --- a/rllib/agents/ppo/tests/test_ddppo.py +++ b/rllib/agents/ppo/tests/test_ddppo.py @@ -2,12 +2,9 @@ import unittest import ray import ray.rllib.agents.ppo as ppo -from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.test_utils import check_compute_single_action, \ framework_iterator -tf = try_import_tf() - class TestDDPPO(unittest.TestCase): @classmethod diff --git a/rllib/agents/ppo/tests/test_ppo.py b/rllib/agents/ppo/tests/test_ppo.py index 472689b3b..b1dec4e5a 100644 --- a/rllib/agents/ppo/tests/test_ppo.py +++ b/rllib/agents/ppo/tests/test_ppo.py @@ -13,12 +13,10 @@ from ray.rllib.models.tf.tf_action_dist import Categorical from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 from ray.rllib.models.torch.torch_action_dist import TorchCategorical from ray.rllib.policy.sample_batch import SampleBatch -from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.numpy import fc from ray.rllib.utils.test_utils import check, framework_iterator, \ check_compute_single_action -tf = try_import_tf() # Fake CartPole episode of n time steps. FAKE_BATCH = { @@ -40,7 +38,7 @@ FAKE_BATCH = { class TestPPO(unittest.TestCase): @classmethod def setUpClass(cls): - ray.init() + ray.init(local_mode=True) @classmethod def tearDownClass(cls): diff --git a/rllib/agents/sac/sac_tf_model.py b/rllib/agents/sac/sac_tf_model.py index f505ad382..e2ac33f0b 100644 --- a/rllib/agents/sac/sac_tf_model.py +++ b/rllib/agents/sac/sac_tf_model.py @@ -4,7 +4,7 @@ import numpy as np from ray.rllib.models.tf.tf_modelv2 import TFModelV2 from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class SACTFModel(TFModelV2): diff --git a/rllib/agents/sac/sac_tf_policy.py b/rllib/agents/sac/sac_tf_policy.py index b68f96ee8..49076ac48 100644 --- a/rllib/agents/sac/sac_tf_policy.py +++ b/rllib/agents/sac/sac_tf_policy.py @@ -17,7 +17,7 @@ from ray.rllib.utils.error import UnsupportedSpaceException from ray.rllib.utils.framework import try_import_tf, try_import_tfp from ray.rllib.utils.tf_ops import minimize_and_clip -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() tfp = try_import_tfp() logger = logging.getLogger(__name__) @@ -138,10 +138,10 @@ def sac_actor_critic_loss(policy, model, _, train_batch): if model.discrete: # Get all action probs directly from pi and form their logp. log_pis_t = tf.nn.log_softmax(model.get_policy_output(model_out_t), -1) - policy_t = tf.exp(log_pis_t) + policy_t = tf.math.exp(log_pis_t) log_pis_tp1 = tf.nn.log_softmax( model.get_policy_output(model_out_tp1), -1) - policy_tp1 = tf.exp(log_pis_tp1) + policy_tp1 = tf.math.exp(log_pis_tp1) # Q-values. q_t = model.get_q_values(model_out_t) # Target Q-values. @@ -219,20 +219,20 @@ def sac_actor_critic_loss(policy, model, _, train_batch): policy.config["gamma"]**policy.config["n_step"] * q_tp1_best_masked) # Compute the TD-error (potentially clipped). - base_td_error = tf.abs(q_t_selected - q_t_selected_target) + base_td_error = tf.math.abs(q_t_selected - q_t_selected_target) if policy.config["twin_q"]: - twin_td_error = tf.abs(twin_q_t_selected - q_t_selected_target) + twin_td_error = tf.math.abs(twin_q_t_selected - q_t_selected_target) td_error = 0.5 * (base_td_error + twin_td_error) else: td_error = base_td_error critic_loss = [ - tf.losses.mean_squared_error( + tf1.losses.mean_squared_error( labels=q_t_selected_target, predictions=q_t_selected, weights=0.5) ] if policy.config["twin_q"]: critic_loss.append( - tf.losses.mean_squared_error( + tf1.losses.mean_squared_error( labels=q_t_selected_target, predictions=twin_q_t_selected, weights=0.5)) @@ -274,7 +274,7 @@ def sac_actor_critic_loss(policy, model, _, train_batch): # in a custom apply op we handle the losses separately, but return them # combined in one loss for now - return actor_loss + tf.add_n(critic_loss) + alpha_loss + return actor_loss + tf.math.add_n(critic_loss) + alpha_loss def gradients(policy, optimizer, loss): @@ -358,7 +358,7 @@ def apply_gradients(policy, optimizer, grads_and_vars): alpha_apply_ops = policy._alpha_optimizer.apply_gradients( policy._alpha_grads_and_vars, - global_step=tf.train.get_or_create_global_step()) + global_step=tf1.train.get_or_create_global_step()) return tf.group([actor_apply_ops, alpha_apply_ops] + critic_apply_ops) @@ -381,20 +381,20 @@ def stats(policy, train_batch): class ActorCriticOptimizerMixin: def __init__(self, config): # create global step for counting the number of update operations - self.global_step = tf.train.get_or_create_global_step() + self.global_step = tf1.train.get_or_create_global_step() # use separate optimizers for actor & critic - self._actor_optimizer = tf.train.AdamOptimizer( + self._actor_optimizer = tf1.train.AdamOptimizer( learning_rate=config["optimization"]["actor_learning_rate"]) self._critic_optimizer = [ - tf.train.AdamOptimizer( + tf1.train.AdamOptimizer( learning_rate=config["optimization"]["critic_learning_rate"]) ] if config["twin_q"]: self._critic_optimizer.append( - tf.train.AdamOptimizer(learning_rate=config["optimization"][ + tf1.train.AdamOptimizer(learning_rate=config["optimization"][ "critic_learning_rate"])) - self._alpha_optimizer = tf.train.AdamOptimizer( + self._alpha_optimizer = tf1.train.AdamOptimizer( learning_rate=config["optimization"]["entropy_learning_rate"]) diff --git a/rllib/agents/sac/tests/test_sac.py b/rllib/agents/sac/tests/test_sac.py index 134127d62..b941b974a 100644 --- a/rllib/agents/sac/tests/test_sac.py +++ b/rllib/agents/sac/tests/test_sac.py @@ -11,13 +11,12 @@ from ray.rllib.models.tf.tf_action_dist import SquashedGaussian from ray.rllib.models.torch.torch_action_dist import TorchSquashedGaussian from ray.rllib.execution.replay_buffer import LocalReplayBuffer from ray.rllib.policy.sample_batch import SampleBatch -from ray.rllib.utils.framework import try_import_tf, try_import_torch +from ray.rllib.utils.framework import try_import_torch from ray.rllib.utils.numpy import fc, relu from ray.rllib.utils.test_utils import check, check_compute_single_action, \ framework_iterator from ray.rllib.utils.torch_ops import convert_to_torch_tensor -tf = try_import_tf() torch, _ = try_import_torch() diff --git a/rllib/agents/trainer.py b/rllib/agents/trainer.py index 6a99bc9c7..74f272f87 100644 --- a/rllib/agents/trainer.py +++ b/rllib/agents/trainer.py @@ -35,7 +35,7 @@ from ray.tune.resources import Resources from ray.tune.logger import Logger, UnifiedLogger from ray.tune.result import DEFAULT_RESULTS_DIR -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -595,12 +595,12 @@ class Trainer(Trainable): self.config.pop("eager") # Enable eager/tracing support. - if tf and self.config["framework"] == "tfe": - if not tf.executing_eagerly(): - tf.enable_eager_execution() + if tf1 and self.config["framework"] == "tfe": + if not tf1.executing_eagerly(): + tf1.enable_eager_execution() logger.info("Executing eagerly, with eager_tracing={}".format( self.config["eager_tracing"])) - if tf and not tf.executing_eagerly() and \ + if tf1 and not tf1.executing_eagerly() and \ self.config["framework"] != "torch": logger.info("Tip: set framework=tfe or the --eager flag to enable " "TensorFlow eager execution") @@ -634,8 +634,8 @@ class Trainer(Trainable): logging.getLogger("ray.rllib").setLevel(self.config["log_level"]) def get_scope(): - if tf and not tf.executing_eagerly(): - return tf.Graph().as_default() + if tf1 and not tf1.executing_eagerly(): + return tf1.Graph().as_default() else: return open(os.devnull) # fake a no-op scope diff --git a/rllib/contrib/alpha_zero/core/alpha_zero_trainer.py b/rllib/contrib/alpha_zero/core/alpha_zero_trainer.py index fa0345455..e5bd8c825 100644 --- a/rllib/contrib/alpha_zero/core/alpha_zero_trainer.py +++ b/rllib/contrib/alpha_zero/core/alpha_zero_trainer.py @@ -12,14 +12,13 @@ from ray.rllib.execution.metric_ops import StandardMetricsReporting from ray.rllib.models.catalog import ModelCatalog from ray.rllib.models.model import restore_original_dimensions from ray.rllib.models.torch.torch_action_dist import TorchCategorical -from ray.rllib.utils.framework import try_import_tf, try_import_torch +from ray.rllib.utils.framework import try_import_torch from ray.tune.registry import ENV_CREATOR, _global_registry from ray.rllib.contrib.alpha_zero.core.alpha_zero_policy import AlphaZeroPolicy from ray.rllib.contrib.alpha_zero.core.mcts import MCTS from ray.rllib.contrib.alpha_zero.core.ranked_rewards import get_r2_env_wrapper -tf = try_import_tf() torch, nn = try_import_torch() logger = logging.getLogger(__name__) diff --git a/rllib/contrib/maddpg/maddpg_policy.py b/rllib/contrib/maddpg/maddpg_policy.py index 2d86dffe0..e6977b6e7 100644 --- a/rllib/contrib/maddpg/maddpg_policy.py +++ b/rllib/contrib/maddpg/maddpg_policy.py @@ -15,7 +15,7 @@ import numpy as np logger = logging.getLogger(__name__) -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() tfp = try_import_tfp() @@ -49,7 +49,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): # _____ Initial Configuration config = dict(ray.rllib.contrib.maddpg.DEFAULT_CONFIG, **config) self.config = config - self.global_step = tf.train.get_or_create_global_step() + self.global_step = tf1.train.get_or_create_global_step() # FIXME: Get done from info is required since agentwise done is not # supported now. @@ -88,7 +88,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): # Placeholders for policy evaluation and updates def _make_ph_n(space_n, name=""): return [ - tf.placeholder( + tf1.placeholder( tf.float32, shape=(None, ) + space.shape, name=name + "_%d" % i) for i, space in enumerate(space_n) @@ -98,9 +98,9 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): act_ph_n = _make_ph_n(act_space_n, "actions") new_obs_ph_n = _make_ph_n(obs_space_n, "new_obs") new_act_ph_n = _make_ph_n(act_space_n, "new_actions") - rew_ph = tf.placeholder( + rew_ph = tf1.placeholder( tf.float32, shape=None, name="rewards_{}".format(agent_id)) - done_ph = tf.placeholder( + done_ph = tf1.placeholder( tf.float32, shape=None, name="dones_{}".format(agent_id)) if config["use_local_critic"]: @@ -190,12 +190,12 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): # _____ Optimizers self.optimizers = { - "critic": tf.train.AdamOptimizer(config["critic_lr"]), - "actor": tf.train.AdamOptimizer(config["actor_lr"]) + "critic": tf1.train.AdamOptimizer(config["critic_lr"]), + "actor": tf1.train.AdamOptimizer(config["actor_lr"]) } # _____ Build variable update ops. - self.tau = tf.placeholder_with_default( + self.tau = tf1.placeholder_with_default( config["tau"], shape=(), name="tau") def _make_target_update_op(vs, target_vs, tau): @@ -213,7 +213,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): for v in variables.values(): vs += v phs = [ - tf.placeholder( + tf1.placeholder( tf.float32, shape=v.get_shape(), name=v.name.split(":")[0] + "_ph") for v in vs @@ -230,7 +230,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): # _____ TensorFlow Initialization - self.sess = tf.get_default_session() + self.sess = tf1.get_default_session() def _make_loss_inputs(placeholders): return [(ph.name.split("/")[-1].split(":")[0], ph) @@ -251,7 +251,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): loss_inputs=loss_inputs, dist_inputs=actor_feature) - self.sess.run(tf.global_variables_initializer()) + self.sess.run(tf1.global_variables_initializer()) # Hard initial update self.update_target(1.0) @@ -280,8 +280,8 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): critic_apply_op = self.optimizers["critic"].apply_gradients( self.gvs["critic"]) - with tf.control_dependencies([tf.assign_add(self.global_step, 1)]): - with tf.control_dependencies([critic_apply_op]): + with tf1.control_dependencies([tf1.assign_add(self.global_step, 1)]): + with tf1.control_dependencies([critic_apply_op]): actor_apply_op = self.optimizers["actor"].apply_gradients( self.gvs["actor"]) @@ -324,7 +324,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): hiddens, activation=None, scope=None): - with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope: + with tf1.variable_scope(scope, reuse=tf1.AUTO_REUSE) as scope: if use_state_preprocessor: model_n = [ ModelCatalog.get_model({ @@ -341,11 +341,12 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): out = tf.concat(obs_n + act_n, axis=1) for hidden in hiddens: - out = tf.layers.dense(out, units=hidden, activation=activation) + out = tf1.layers.dense( + out, units=hidden, activation=activation) feature = out - out = tf.layers.dense(feature, units=1, activation=None) + out = tf1.layers.dense(feature, units=1, activation=None) - return out, feature, model_n, tf.global_variables(scope.name) + return out, feature, model_n, tf1.global_variables(scope.name) def _build_actor_network(self, obs, @@ -355,7 +356,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): hiddens, activation=None, scope=None): - with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope: + with tf1.variable_scope(scope, reuse=tf1.AUTO_REUSE) as scope: if use_state_preprocessor: model = ModelCatalog.get_model({ "obs": obs, @@ -367,13 +368,14 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): out = obs for hidden in hiddens: - out = tf.layers.dense(out, units=hidden, activation=activation) - feature = tf.layers.dense( + out = tf1.layers.dense( + out, units=hidden, activation=activation) + feature = tf1.layers.dense( out, units=act_space.shape[0], activation=None) sampler = tfp.distributions.RelaxedOneHotCategorical( temperature=1.0, logits=feature).sample() - return sampler, feature, model, tf.global_variables(scope.name) + return sampler, feature, model, tf1.global_variables(scope.name) def update_target(self, tau=None): if tau is not None: diff --git a/rllib/evaluation/rollout_worker.py b/rllib/evaluation/rollout_worker.py index d61e91373..157249976 100644 --- a/rllib/evaluation/rollout_worker.py +++ b/rllib/evaluation/rollout_worker.py @@ -50,7 +50,7 @@ if TYPE_CHECKING: # Generic type var for foreach_* methods. T = TypeVar("T") -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, _ = try_import_torch() logger = logging.getLogger(__name__) @@ -283,12 +283,12 @@ class RolloutWorker(ParallelIteratorWorker): ParallelIteratorWorker.__init__(self, gen_rollouts, False) policy_config: TrainerConfigDict = policy_config or {} - if (tf and policy_config.get("framework") == "tfe" + if (tf1 and policy_config.get("framework") == "tfe" and not policy_config.get("no_eager_on_workers") # This eager check is necessary for certain all-framework tests # that use tf's eager_mode() context generator. - and not tf.executing_eagerly()): - tf.enable_eager_execution() + and not tf1.executing_eagerly()): + tf1.enable_eager_execution() if log_level: logging.getLogger("ray.rllib").setLevel(log_level) @@ -382,21 +382,21 @@ class RolloutWorker(ParallelIteratorWorker): torch.manual_seed(seed) except AssertionError: logger.info("Could not seed torch") - if _has_tensorflow_graph(policy_dict) and not (tf and - tf.executing_eagerly()): - if not tf: + if _has_tensorflow_graph(policy_dict) and not ( + tf1 and tf1.executing_eagerly()): + if not tf1: raise ImportError("Could not import tensorflow") - with tf.Graph().as_default(): + with tf1.Graph().as_default(): if tf_session_creator: self.tf_sess = tf_session_creator() else: - self.tf_sess = tf.Session( - config=tf.ConfigProto( - gpu_options=tf.GPUOptions(allow_growth=True))) + self.tf_sess = tf1.Session( + config=tf1.ConfigProto( + gpu_options=tf1.GPUOptions(allow_growth=True))) with self.tf_sess.as_default(): # set graph-level seed if seed is not None: - tf.set_random_seed(seed) + tf1.set_random_seed(seed) self.policy_map, self.preprocessors = \ self._build_policy_map(policy_dict, policy_config) if (ray.is_initialized() @@ -406,7 +406,7 @@ class RolloutWorker(ParallelIteratorWorker): "Creating policy evaluation worker {}".format( worker_index) + " on CPU (please ignore any CUDA init errors)") - elif not tf.test.is_gpu_available(): + elif not tf1.test.is_gpu_available(): raise RuntimeError( "GPUs were assigned to this worker by Ray, but " "TensorFlow reports GPU acceleration is disabled. " @@ -956,7 +956,7 @@ class RolloutWorker(ParallelIteratorWorker): "Found raw Tuple|Dict space as input to policy. " "Please preprocess these observations with a " "Tuple|DictFlatteningPreprocessor.") - if tf and tf.executing_eagerly(): + if tf1 and tf1.executing_eagerly(): if hasattr(cls, "as_eager"): cls = cls.as_eager() if policy_config["eager_tracing"]: @@ -966,8 +966,8 @@ class RolloutWorker(ParallelIteratorWorker): else: raise ValueError("This policy does not support eager " "execution: {}".format(cls)) - if tf: - with tf.variable_scope(name): + if tf1: + with tf1.variable_scope(name): policy_map[name] = cls(obs_space, act_space, merged_conf) else: policy_map[name] = cls(obs_space, act_space, merged_conf) diff --git a/rllib/evaluation/worker_set.py b/rllib/evaluation/worker_set.py index ed79b6444..46f6a86ff 100644 --- a/rllib/evaluation/worker_set.py +++ b/rllib/evaluation/worker_set.py @@ -14,7 +14,7 @@ from ray.rllib.utils import merge_dicts from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.types import PolicyID, TrainerConfigDict, EnvType -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -202,8 +202,8 @@ class WorkerSet: def session_creator(): logger.debug("Creating TF session {}".format( config["tf_session_args"])) - return tf.Session( - config=tf.ConfigProto(**config["tf_session_args"])) + return tf1.Session( + config=tf1.ConfigProto(**config["tf_session_args"])) if isinstance(config["input"], FunctionType): input_creator = config["input"] diff --git a/rllib/examples/attention_net.py b/rllib/examples/attention_net.py index 02c8d96b8..8a4089dd8 100644 --- a/rllib/examples/attention_net.py +++ b/rllib/examples/attention_net.py @@ -11,7 +11,7 @@ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.test_utils import check_learning_achieved from ray.tune import registry -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() parser = argparse.ArgumentParser() parser.add_argument("--run", type=str, default="PPO") diff --git a/rllib/examples/attention_net_supervised.py b/rllib/examples/attention_net_supervised.py index 642bed2db..0282a6195 100644 --- a/rllib/examples/attention_net_supervised.py +++ b/rllib/examples/attention_net_supervised.py @@ -4,7 +4,7 @@ import numpy as np from rllib.models.tf.attention_net import TrXLNet from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() def bit_shift_generator(seq_length, shift, batch_size): diff --git a/rllib/examples/batch_norm_model.py b/rllib/examples/batch_norm_model.py index fa41a0add..5159a166f 100644 --- a/rllib/examples/batch_norm_model.py +++ b/rllib/examples/batch_norm_model.py @@ -10,7 +10,7 @@ from ray.rllib.models import ModelCatalog from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.test_utils import check_learning_achieved -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() parser = argparse.ArgumentParser() parser.add_argument("--run", type=str, default="PPO") diff --git a/rllib/examples/centralized_critic.py b/rllib/examples/centralized_critic.py index 260d8494e..42c086dc1 100644 --- a/rllib/examples/centralized_critic.py +++ b/rllib/examples/centralized_critic.py @@ -39,7 +39,7 @@ from ray.rllib.utils.test_utils import check_learning_achieved from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable from ray.rllib.utils.torch_ops import convert_to_torch_tensor -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, nn = try_import_torch() OPPONENT_OBS = "opponent_obs" diff --git a/rllib/examples/custom_env.py b/rllib/examples/custom_env.py index 8963f4882..bde0f473c 100644 --- a/rllib/examples/custom_env.py +++ b/rllib/examples/custom_env.py @@ -23,7 +23,7 @@ from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC from ray.rllib.utils.framework import try_import_tf, try_import_torch from ray.rllib.utils.test_utils import check_learning_achieved -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, nn = try_import_torch() parser = argparse.ArgumentParser() diff --git a/rllib/examples/custom_keras_model.py b/rllib/examples/custom_keras_model.py index aac7e41d4..9a9279670 100644 --- a/rllib/examples/custom_keras_model.py +++ b/rllib/examples/custom_keras_model.py @@ -12,7 +12,7 @@ from ray.rllib.models.tf.tf_modelv2 import TFModelV2 from ray.rllib.models.tf.visionnet import VisionNetwork as MyVisionNetwork from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() parser = argparse.ArgumentParser() parser.add_argument("--run", type=str, default="DQN") # Try PG, PPO, DQN diff --git a/rllib/examples/custom_loss.py b/rllib/examples/custom_loss.py index 9d3d90348..0f47632d9 100644 --- a/rllib/examples/custom_loss.py +++ b/rllib/examples/custom_loss.py @@ -21,7 +21,7 @@ from ray.rllib.examples.models.custom_loss_model import CustomLossModel, \ from ray.rllib.models import ModelCatalog from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() parser = argparse.ArgumentParser() parser.add_argument("--torch", action="store_true") diff --git a/rllib/examples/custom_tf_policy.py b/rllib/examples/custom_tf_policy.py index e2a919273..6e1a968e3 100644 --- a/rllib/examples/custom_tf_policy.py +++ b/rllib/examples/custom_tf_policy.py @@ -7,7 +7,7 @@ from ray.rllib.evaluation.postprocessing import discount from ray.rllib.policy.tf_policy_template import build_tf_policy from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() parser = argparse.ArgumentParser() parser.add_argument("--stop-iters", type=int, default=200) diff --git a/rllib/examples/eager_execution.py b/rllib/examples/eager_execution.py index f3f05328e..fb603b23b 100644 --- a/rllib/examples/eager_execution.py +++ b/rllib/examples/eager_execution.py @@ -11,7 +11,7 @@ from ray.rllib.policy.tf_policy_template import build_tf_policy from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.test_utils import check_learning_achieved -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() parser = argparse.ArgumentParser() parser.add_argument("--stop-iters", type=int, default=200) diff --git a/rllib/examples/export/cartpole_dqn_export.py b/rllib/examples/export/cartpole_dqn_export.py index 46ab741a9..8b315dd79 100644 --- a/rllib/examples/export/cartpole_dqn_export.py +++ b/rllib/examples/export/cartpole_dqn_export.py @@ -6,7 +6,7 @@ import ray from ray.rllib.agents.registry import get_agent_class from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() ray.init(num_cpus=10) @@ -25,14 +25,14 @@ def train_and_export(algo_name, num_steps, model_dir, ckpt_dir, prefix): def restore_saved_model(export_dir): signature_key = \ - tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY - g = tf.Graph() + tf1.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY + g = tf1.Graph() with g.as_default(): - with tf.Session(graph=g) as sess: + with tf1.Session(graph=g) as sess: meta_graph_def = \ - tf.saved_model.load(sess, - [tf.saved_model.tag_constants.SERVING], - export_dir) + tf1.saved_model.load(sess, + [tf1.saved_model.tag_constants.SERVING], + export_dir) print("Model restored!") print("Signature Def Information:") print(meta_graph_def.signature_def[signature_key]) @@ -41,13 +41,13 @@ def restore_saved_model(export_dir): def restore_checkpoint(export_dir, prefix): - sess = tf.Session() + sess = tf1.Session() meta_file = "%s.meta" % prefix - saver = tf.train.import_meta_graph(os.path.join(export_dir, meta_file)) + saver = tf1.train.import_meta_graph(os.path.join(export_dir, meta_file)) saver.restore(sess, os.path.join(export_dir, prefix)) print("Checkpoint restored!") print("Variables Information:") - for v in tf.trainable_variables(): + for v in tf1.trainable_variables(): value = sess.run(v) print(v.name, value) diff --git a/rllib/examples/mobilenet_v2_with_lstm.py b/rllib/examples/mobilenet_v2_with_lstm.py index e0f066a13..4f8ba7edc 100644 --- a/rllib/examples/mobilenet_v2_with_lstm.py +++ b/rllib/examples/mobilenet_v2_with_lstm.py @@ -13,7 +13,7 @@ from ray.rllib.examples.models.mobilenet_v2_with_lstm_models import \ from ray.rllib.models import ModelCatalog from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() cnn_shape = (4, 4, 3) # The torch version of MobileNetV2 does channels first. diff --git a/rllib/examples/models/autoregressive_action_dist.py b/rllib/examples/models/autoregressive_action_dist.py index 5385e0b15..929a7d782 100644 --- a/rllib/examples/models/autoregressive_action_dist.py +++ b/rllib/examples/models/autoregressive_action_dist.py @@ -3,7 +3,7 @@ from ray.rllib.models.torch.torch_action_dist import TorchCategorical, \ TorchDistributionWrapper from ray.rllib.utils.framework import try_import_tf, try_import_torch -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, nn = try_import_torch() diff --git a/rllib/examples/models/autoregressive_action_model.py b/rllib/examples/models/autoregressive_action_model.py index 06f32f78a..5602f9b52 100644 --- a/rllib/examples/models/autoregressive_action_model.py +++ b/rllib/examples/models/autoregressive_action_model.py @@ -7,7 +7,7 @@ from ray.rllib.models.torch.misc import SlimFC from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 from ray.rllib.utils.framework import try_import_tf, try_import_torch -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, nn = try_import_torch() diff --git a/rllib/examples/models/batch_norm_model.py b/rllib/examples/models/batch_norm_model.py index 762793de2..5091415ec 100644 --- a/rllib/examples/models/batch_norm_model.py +++ b/rllib/examples/models/batch_norm_model.py @@ -9,7 +9,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf, try_import_torch -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, nn = try_import_torch() @@ -39,27 +39,27 @@ class BatchNormModel(TFModelV2): def forward(self, input_dict, state, seq_lens): last_layer = input_dict["obs"] hiddens = [256, 256] - with tf.variable_scope("model", reuse=tf.AUTO_REUSE): + with tf1.variable_scope("model", reuse=tf1.AUTO_REUSE): for i, size in enumerate(hiddens): - last_layer = tf.layers.dense( + last_layer = tf1.layers.dense( last_layer, size, kernel_initializer=normc_initializer(1.0), activation=tf.nn.tanh, name="fc{}".format(i)) # Add a batch norm layer - last_layer = tf.layers.batch_normalization( + last_layer = tf1.layers.batch_normalization( last_layer, training=input_dict["is_training"], name="bn_{}".format(i)) - output = tf.layers.dense( + output = tf1.layers.dense( last_layer, self.num_outputs, kernel_initializer=normc_initializer(0.01), activation=None, name="out") - self._value_out = tf.layers.dense( + self._value_out = tf1.layers.dense( last_layer, 1, kernel_initializer=normc_initializer(1.0), @@ -67,8 +67,8 @@ class BatchNormModel(TFModelV2): name="vf") if not self._registered: self.register_variables( - tf.get_collection( - tf.GraphKeys.TRAINABLE_VARIABLES, scope=".+/model/.+")) + tf1.get_collection( + tf1.GraphKeys.TRAINABLE_VARIABLES, scope=".+/model/.+")) self._registered = True return output, [] diff --git a/rllib/examples/models/centralized_critic_models.py b/rllib/examples/models/centralized_critic_models.py index 030ab66fe..276f42381 100644 --- a/rllib/examples/models/centralized_critic_models.py +++ b/rllib/examples/models/centralized_critic_models.py @@ -9,7 +9,7 @@ from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf, try_import_torch -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, nn = try_import_torch() diff --git a/rllib/examples/models/custom_loss_model.py b/rllib/examples/models/custom_loss_model.py index 0a536c7d2..a0fa41c2b 100644 --- a/rllib/examples/models/custom_loss_model.py +++ b/rllib/examples/models/custom_loss_model.py @@ -10,7 +10,7 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf, try_import_torch from ray.rllib.offline import JsonReader -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, nn = try_import_torch() @@ -73,7 +73,7 @@ class DeprecatedCustomLossModelV1(Model): def _build_layers_v2(self, input_dict, num_outputs, options): self.obs_in = input_dict["obs"] - with tf.variable_scope("shared", reuse=tf.AUTO_REUSE): + with tf1.variable_scope("shared", reuse=tf1.AUTO_REUSE): self.fcnet = FullyConnectedNetwork(input_dict, self.obs_space, self.action_space, num_outputs, options) diff --git a/rllib/examples/models/eager_model.py b/rllib/examples/models/eager_model.py index 6e2d44c04..a20236711 100644 --- a/rllib/examples/models/eager_model.py +++ b/rllib/examples/models/eager_model.py @@ -6,7 +6,7 @@ from ray.rllib.models.tf.tf_modelv2 import TFModelV2 from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class EagerModel(TFModelV2): @@ -34,7 +34,7 @@ class EagerModel(TFModelV2): def lambda_(x): eager_out = tf.py_function(self.forward_eager, [x], tf.float32) - with tf.control_dependencies([eager_out]): + with tf1.control_dependencies([eager_out]): eager_out.set_shape(x.shape) return eager_out diff --git a/rllib/examples/models/fast_model.py b/rllib/examples/models/fast_model.py index 7e6528db7..c377608d3 100644 --- a/rllib/examples/models/fast_model.py +++ b/rllib/examples/models/fast_model.py @@ -5,7 +5,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf, try_import_torch -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, nn = try_import_torch() @@ -25,11 +25,11 @@ class FastModel(TFModelV2): @override(ModelV2) def forward(self, input_dict, state, seq_lens): - with tf.variable_scope("model", reuse=tf.AUTO_REUSE): - bias = tf.get_variable( + with tf1.variable_scope("model", reuse=tf1.AUTO_REUSE): + bias = tf1.get_variable( dtype=tf.float32, name="bias", - initializer=tf.zeros_initializer, + initializer=tf.keras.initializers.Zeros(), shape=()) output = bias + \ tf.zeros([tf.shape(input_dict["obs"])[0], self.num_outputs]) @@ -37,8 +37,8 @@ class FastModel(TFModelV2): if not self._registered: self.register_variables( - tf.get_collection( - tf.GraphKeys.TRAINABLE_VARIABLES, scope=".+/model/.+")) + tf1.get_collection( + tf1.GraphKeys.TRAINABLE_VARIABLES, scope=".+/model/.+")) self._registered = True return output, [] diff --git a/rllib/examples/models/mobilenet_v2_with_lstm_models.py b/rllib/examples/models/mobilenet_v2_with_lstm_models.py index 3bc7052be..5b0aa8248 100644 --- a/rllib/examples/models/mobilenet_v2_with_lstm_models.py +++ b/rllib/examples/models/mobilenet_v2_with_lstm_models.py @@ -7,7 +7,7 @@ from ray.rllib.models.torch.recurrent_net import RecurrentNetwork as TorchRNN from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf, try_import_torch -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, nn = try_import_torch() diff --git a/rllib/examples/models/parametric_actions_model.py b/rllib/examples/models/parametric_actions_model.py index f0c62935d..225399286 100644 --- a/rllib/examples/models/parametric_actions_model.py +++ b/rllib/examples/models/parametric_actions_model.py @@ -9,7 +9,7 @@ from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC from ray.rllib.utils.framework import try_import_tf, try_import_torch from ray.rllib.utils.numpy import LARGE_INTEGER -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, nn = try_import_torch() diff --git a/rllib/examples/models/rnn_model.py b/rllib/examples/models/rnn_model.py index 55d6f940b..4b3d3db9e 100644 --- a/rllib/examples/models/rnn_model.py +++ b/rllib/examples/models/rnn_model.py @@ -7,7 +7,7 @@ from ray.rllib.models.torch.recurrent_net import RecurrentNetwork as TorchRNN from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf, try_import_torch -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, nn = try_import_torch() diff --git a/rllib/examples/models/rnn_spy_model.py b/rllib/examples/models/rnn_spy_model.py index 18f06f202..1b1d95f1e 100644 --- a/rllib/examples/models/rnn_spy_model.py +++ b/rllib/examples/models/rnn_spy_model.py @@ -8,13 +8,15 @@ from ray.rllib.models.tf.recurrent_net import RecurrentNetwork from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class SpyLayer(tf.keras.layers.Layer): """A keras Layer, which intercepts its inputs and stored them as pickled. """ + output = np.array(0, dtype=np.int64) + def __init__(self, num_outputs, **kwargs): super().__init__(**kwargs) @@ -26,7 +28,7 @@ class SpyLayer(tf.keras.layers.Layer): """ del kwargs - spy_fn = tf.py_func( + spy_fn = tf1.py_func( self.spy, [ inputs[0], # observations @@ -36,11 +38,11 @@ class SpyLayer(tf.keras.layers.Layer): inputs[5], # h_out inputs[6], # c_out ], - tf.int64, + tf.int64, # Must match SpyLayer.output's type. stateful=True) # Compute outputs - with tf.control_dependencies([spy_fn]): + with tf1.control_dependencies([spy_fn]): return self.dense(inputs[1]) @staticmethod @@ -48,7 +50,8 @@ class SpyLayer(tf.keras.layers.Layer): """The actual spy operation: Store inputs in internal_kv.""" if len(inputs) == 1: - return 0 # don't capture inference inputs + # don't capture inference inputs + return SpyLayer.output # TF runs this function in an isolated context, so we have to use # redis to communicate back to our suite ray.experimental.internal_kv._internal_kv_put( @@ -61,7 +64,7 @@ class SpyLayer(tf.keras.layers.Layer): }), overwrite=True) RNNSpyModel.capture_index += 1 - return 0 + return SpyLayer.output class RNNSpyModel(RecurrentNetwork): diff --git a/rllib/examples/models/shared_weights_model.py b/rllib/examples/models/shared_weights_model.py index 137396a2f..24e9a041f 100644 --- a/rllib/examples/models/shared_weights_model.py +++ b/rllib/examples/models/shared_weights_model.py @@ -7,7 +7,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf, try_import_torch -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, nn = try_import_torch() @@ -15,7 +15,7 @@ class SharedWeightsModel1(TFModelV2): """Example of weight sharing between two different TFModelV2s. Here, we share the variables defined in the 'shared' variable scope - by entering it explicitly with tf.AUTO_REUSE. This creates the + by entering it explicitly with tf1.AUTO_REUSE. This creates the variables for the 'fc1' layer in a global scope called 'shared' (outside of the Policy's normal variable scope). """ @@ -26,9 +26,9 @@ class SharedWeightsModel1(TFModelV2): model_config, name) inputs = tf.keras.layers.Input(observation_space.shape) - with tf.variable_scope( - tf.VariableScope(tf.AUTO_REUSE, "shared"), - reuse=tf.AUTO_REUSE, + with tf1.variable_scope( + tf1.VariableScope(tf1.AUTO_REUSE, "shared"), + reuse=tf1.AUTO_REUSE, auxiliary_name_scope=False): last_layer = tf.keras.layers.Dense( units=64, activation=tf.nn.relu, name="fc1")(inputs) @@ -60,9 +60,9 @@ class SharedWeightsModel2(TFModelV2): inputs = tf.keras.layers.Input(observation_space.shape) # Weights shared with SharedWeightsModel1. - with tf.variable_scope( - tf.VariableScope(tf.AUTO_REUSE, "shared"), - reuse=tf.AUTO_REUSE, + with tf1.variable_scope( + tf1.VariableScope(tf1.AUTO_REUSE, "shared"), + reuse=tf1.AUTO_REUSE, auxiliary_name_scope=False): last_layer = tf.keras.layers.Dense( units=64, activation=tf.nn.relu, name="fc1")(inputs) diff --git a/rllib/examples/models/simple_rpg_model.py b/rllib/examples/models/simple_rpg_model.py index b77428745..6126ea688 100644 --- a/rllib/examples/models/simple_rpg_model.py +++ b/rllib/examples/models/simple_rpg_model.py @@ -4,7 +4,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFCNet from ray.rllib.utils.framework import try_import_tf, try_import_torch -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, nn = try_import_torch() diff --git a/rllib/examples/multi_agent_cartpole.py b/rllib/examples/multi_agent_cartpole.py index c7b41c36b..64cde0f5c 100644 --- a/rllib/examples/multi_agent_cartpole.py +++ b/rllib/examples/multi_agent_cartpole.py @@ -22,7 +22,7 @@ from ray.rllib.models import ModelCatalog from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.test_utils import check_learning_achieved -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() parser = argparse.ArgumentParser() diff --git a/rllib/examples/rock_paper_scissors_multiagent.py b/rllib/examples/rock_paper_scissors_multiagent.py index 97e3ec4e5..f9a22a596 100644 --- a/rllib/examples/rock_paper_scissors_multiagent.py +++ b/rllib/examples/rock_paper_scissors_multiagent.py @@ -20,7 +20,7 @@ from ray.rllib.examples.policy.rock_paper_scissors_dummies import \ from ray.rllib.utils.framework import try_import_tf, try_import_torch from ray.rllib.utils.test_utils import check_learning_achieved -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, _ = try_import_torch() parser = argparse.ArgumentParser() diff --git a/rllib/execution/multi_gpu_impl.py b/rllib/execution/multi_gpu_impl.py index 0771bb18b..010ee477c 100644 --- a/rllib/execution/multi_gpu_impl.py +++ b/rllib/execution/multi_gpu_impl.py @@ -5,7 +5,7 @@ from ray.util.debug import log_once from ray.rllib.utils.debug import summarize from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() # Variable scope in which created variables will be placed under TOWER_SCOPE_NAME = "tower" @@ -26,7 +26,7 @@ class LocalSyncParallelOptimizer: `load_data`, so you can make multiple passes (possibly in randomized order) over the same data once loaded. - This is similar to tf.train.SyncReplicasOptimizer, but works within a + This is similar to tf1.train.SyncReplicasOptimizer, but works within a single TensorFlow graph, i.e. implements in-graph replicated training: https://www.tensorflow.org/api_docs/python/tf/train/SyncReplicasOptimizer @@ -63,21 +63,21 @@ class LocalSyncParallelOptimizer: self.build_graph = build_graph # First initialize the shared loss network - with tf.name_scope(TOWER_SCOPE_NAME): + with tf1.name_scope(TOWER_SCOPE_NAME): self._shared_loss = build_graph(self.loss_inputs) - shared_ops = tf.get_collection( - tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name) + shared_ops = tf1.get_collection( + tf1.GraphKeys.UPDATE_OPS, scope=tf1.get_variable_scope().name) # Then setup the per-device loss graphs that use the shared weights - self._batch_index = tf.placeholder(tf.int32, name="batch_index") + self._batch_index = tf1.placeholder(tf.int32, name="batch_index") # Dynamic batch size, which may be shrunk if there isn't enough data - self._per_device_batch_size = tf.placeholder( + self._per_device_batch_size = tf1.placeholder( tf.int32, name="per_device_batch_size") self._loaded_per_device_batch_size = max_per_device_batch_size # When loading RNN input, we dynamically determine the max seq len - self._max_seq_len = tf.placeholder(tf.int32, name="max_seq_len") + self._max_seq_len = tf1.placeholder(tf.int32, name="max_seq_len") self._loaded_max_seq_len = 1 # Split on the CPU in case the data doesn't fit in GPU memory. @@ -103,15 +103,15 @@ class LocalSyncParallelOptimizer: # gather update ops for any batch norm layers. TODO(ekl) here we will # use all the ops found which won't work for DQN / DDPG, but those # aren't supported with multi-gpu right now anyways. - self._update_ops = tf.get_collection( - tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name) + self._update_ops = tf1.get_collection( + tf1.GraphKeys.UPDATE_OPS, scope=tf1.get_variable_scope().name) for op in shared_ops: self._update_ops.remove(op) # only care about tower update ops if self._update_ops: logger.debug("Update ops to run on apply gradient: {}".format( self._update_ops)) - with tf.control_dependencies(self._update_ops): + with tf1.control_dependencies(self._update_ops): self._train_op = self.optimizer.apply_gradients(avg) def load_data(self, sess, inputs, state_inputs): @@ -265,11 +265,11 @@ class LocalSyncParallelOptimizer: def _setup_device(self, device, device_input_placeholders, num_data_in): assert num_data_in <= len(device_input_placeholders) with tf.device(device): - with tf.name_scope(TOWER_SCOPE_NAME): + with tf1.name_scope(TOWER_SCOPE_NAME): device_input_batches = [] device_input_slices = [] for i, ph in enumerate(device_input_placeholders): - current_batch = tf.Variable( + current_batch = tf1.Variable( ph, trainable=False, validate_shape=False, diff --git a/rllib/execution/multi_gpu_learner.py b/rllib/execution/multi_gpu_learner.py index 5d2d2c220..4e1b2a342 100644 --- a/rllib/execution/multi_gpu_learner.py +++ b/rllib/execution/multi_gpu_learner.py @@ -13,7 +13,7 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.timer import TimerStat -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -84,14 +84,15 @@ class TFMultiGPULearner(LearnerThread): self.par_opt = [] with self.local_worker.tf_sess.graph.as_default(): with self.local_worker.tf_sess.as_default(): - with tf.variable_scope(DEFAULT_POLICY_ID, reuse=tf.AUTO_REUSE): + with tf1.variable_scope( + DEFAULT_POLICY_ID, reuse=tf1.AUTO_REUSE): if self.policy._state_inputs: rnn_inputs = self.policy._state_inputs + [ self.policy._seq_lens ] else: rnn_inputs = [] - adam = tf.train.AdamOptimizer(self.lr) + adam = tf1.train.AdamOptimizer(self.lr) for _ in range(num_data_loader_buffers): self.par_opt.append( LocalSyncParallelOptimizer( @@ -103,7 +104,7 @@ class TFMultiGPULearner(LearnerThread): self.policy.copy)) self.sess = self.local_worker.tf_sess - self.sess.run(tf.global_variables_initializer()) + self.sess.run(tf1.global_variables_initializer()) self.idle_optimizers = queue.Queue() self.ready_optimizers = queue.Queue() diff --git a/rllib/execution/train_ops.py b/rllib/execution/train_ops.py index f4e794740..05cb942c7 100644 --- a/rllib/execution/train_ops.py +++ b/rllib/execution/train_ops.py @@ -20,7 +20,7 @@ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.sgd import do_minibatch_sgd, averaged from ray.rllib.utils.types import PolicyID, SampleBatchType -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -137,7 +137,7 @@ class TrainTFMultiGPU: with self.workers.local_worker().tf_sess.as_default(): for policy_id in self.policies: policy = self.workers.local_worker().get_policy(policy_id) - with tf.variable_scope(policy_id, reuse=tf.AUTO_REUSE): + with tf1.variable_scope(policy_id, reuse=tf1.AUTO_REUSE): if policy._state_inputs: rnn_inputs = policy._state_inputs + [ policy._seq_lens @@ -152,7 +152,7 @@ class TrainTFMultiGPU: self.per_device_batch_size, policy.copy)) self.sess = self.workers.local_worker().tf_sess - self.sess.run(tf.global_variables_initializer()) + self.sess.run(tf1.global_variables_initializer()) def __call__(self, samples: SampleBatchType) -> (SampleBatchType, List[dict]): diff --git a/rllib/models/catalog.py b/rllib/models/catalog.py index 652c1e65e..8992f8065 100644 --- a/rllib/models/catalog.py +++ b/rllib/models/catalog.py @@ -27,7 +27,7 @@ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.spaces.simplex import Simplex from ray.rllib.utils.spaces.space_utils import flatten_space -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() tree = try_import_tree() logger = logging.getLogger(__name__) @@ -257,7 +257,7 @@ class ModelCatalog: dtype, shape = ModelCatalog.get_action_shape(action_space) - return tf.placeholder(dtype, shape=shape, name=name) + return tf1.placeholder(dtype, shape=shape, name=name) @staticmethod @DeveloperAPI diff --git a/rllib/models/model.py b/rllib/models/model.py index ffb56c5eb..8ff85633f 100644 --- a/rllib/models/model.py +++ b/rllib/models/model.py @@ -8,7 +8,7 @@ from ray.rllib.utils.annotations import PublicAPI, DeveloperAPI from ray.rllib.utils.deprecation import deprecation_warning from ray.rllib.utils.framework import try_import_tf, try_import_torch -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, _ = try_import_torch() logger = logging.getLogger(__name__) @@ -38,13 +38,13 @@ class Model: self.action_space = action_space self.num_outputs = num_outputs self.options = options - self.scope = tf.get_variable_scope() - self.session = tf.get_default_session() + self.scope = tf1.get_variable_scope() + self.session = tf1.get_default_session() self.input_dict = input_dict if seq_lens is not None: self.seq_lens = seq_lens else: - self.seq_lens = tf.placeholder( + self.seq_lens = tf1.placeholder( dtype=tf.int32, shape=[None], name="seq_lens") self._num_outputs = num_outputs @@ -68,10 +68,10 @@ class Model: input_dict["obs"], num_outputs, options) if options.get("free_log_std", False): - log_std = tf.get_variable( + log_std = tf1.get_variable( name="log_std", shape=[num_outputs], - initializer=tf.zeros_initializer) + initializer=tf1.zeros_initializer) self.outputs = tf.concat( [self.outputs, 0.0 * self.outputs + log_std], 1) @@ -196,7 +196,7 @@ class Model: def flatten(obs, framework): """Flatten the given tensor.""" if framework == "tf": - return tf.layers.flatten(obs) + return tf1.layers.flatten(obs) elif framework == "torch": assert torch is not None return torch.flatten(obs, start_dim=1) diff --git a/rllib/models/modelv2.py b/rllib/models/modelv2.py index 25e070b99..7247d119f 100644 --- a/rllib/models/modelv2.py +++ b/rllib/models/modelv2.py @@ -13,7 +13,7 @@ from ray.rllib.utils.framework import try_import_tf, try_import_torch, \ from ray.rllib.utils.spaces.repeated import Repeated from ray.rllib.utils.types import ModelConfigDict -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, _ = try_import_torch() @@ -339,7 +339,7 @@ class NullContextManager: def flatten(obs, framework): """Flatten the given tensor.""" if framework == "tf": - return tf.layers.flatten(obs) + return tf1.layers.flatten(obs) elif framework == "torch": assert torch is not None return torch.flatten(obs, start_dim=1) diff --git a/rllib/models/tests/test_torch_modules.py b/rllib/models/tests/test_attention_nets.py similarity index 98% rename from rllib/models/tests/test_torch_modules.py rename to rllib/models/tests/test_attention_nets.py index c55579642..2065f226e 100644 --- a/rllib/models/tests/test_torch_modules.py +++ b/rllib/models/tests/test_attention_nets.py @@ -13,7 +13,7 @@ from ray.rllib.utils.framework import try_import_torch, try_import_tf from ray.rllib.utils.test_utils import framework_iterator torch, nn = try_import_torch() -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class TestModules(unittest.TestCase): @@ -144,7 +144,7 @@ class TestModules(unittest.TestCase): model = TorchMultiHeadAttention( in_dim=D_in, out_dim=D_out, num_heads=2, head_dim=32) - self.train_torch_layer(model, x, y) + self.train_torch_layer(model, x, y, num_epochs=500) else: # framework is tensorflow or tensorflow-eager @@ -165,7 +165,7 @@ class TestModules(unittest.TestCase): that it trains in a supervised setting.""" # Checks that torch and tf embedding matrices are the same - with tf.Session().as_default() as sess: + with tf1.Session().as_default() as sess: assert np.allclose( relative_position_embedding(20, 15).eval(session=sess), relative_position_embedding_torch(20, 15).numpy()) diff --git a/rllib/models/tests/test_distributions.py b/rllib/models/tests/test_distributions.py index 4531dd00f..3a4bebd13 100644 --- a/rllib/models/tests/test_distributions.py +++ b/rllib/models/tests/test_distributions.py @@ -16,7 +16,7 @@ from ray.rllib.utils.numpy import MIN_LOG_NN_OUTPUT, MAX_LOG_NN_OUTPUT, \ softmax, SMALL_NUMBER, LARGE_INTEGER from ray.rllib.utils.test_utils import check, framework_iterator -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, _ = try_import_torch() tree = try_import_tree() @@ -75,13 +75,13 @@ class TestDistributions(unittest.TestCase): def test_categorical(self): """Tests the Categorical ActionDistribution (tf only).""" num_samples = 100000 - logits = tf.placeholder(tf.float32, shape=(None, 10)) + logits = tf1.placeholder(tf.float32, shape=(None, 10)) z = 8 * (np.random.rand(10) - 0.5) data = np.tile(z, (num_samples, 1)) c = Categorical(logits, {}) # dummy config dict sample_op = c.sample() - sess = tf.Session() - sess.run(tf.global_variables_initializer()) + sess = tf1.Session() + sess.run(tf1.global_variables_initializer()) samples = sess.run(sample_op, feed_dict={logits: data}) counts = np.zeros(10) for sample in samples: diff --git a/rllib/models/tf/attention_net.py b/rllib/models/tf/attention_net.py index 513e99dda..c96cf6c48 100644 --- a/rllib/models/tf/attention_net.py +++ b/rllib/models/tf/attention_net.py @@ -17,7 +17,7 @@ from ray.rllib.models.tf.recurrent_net import RecurrentNetwork from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() # TODO(sven): Use RLlib's FCNet instead. diff --git a/rllib/models/tf/fcnet.py b/rllib/models/tf/fcnet.py index 2b13eea18..b016b5a97 100644 --- a/rllib/models/tf/fcnet.py +++ b/rllib/models/tf/fcnet.py @@ -4,7 +4,7 @@ from ray.rllib.models.tf.misc import normc_initializer from ray.rllib.models.tf.tf_modelv2 import TFModelV2 from ray.rllib.utils.framework import get_activation_fn, try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class FullyConnectedNetwork(TFModelV2): diff --git a/rllib/models/tf/fcnet_v1.py b/rllib/models/tf/fcnet_v1.py index 54746111f..cc8ed3d59 100644 --- a/rllib/models/tf/fcnet_v1.py +++ b/rllib/models/tf/fcnet_v1.py @@ -4,7 +4,7 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.deprecation import deprecation_warning from ray.rllib.utils.framework import get_activation_fn, try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() # Deprecated: see as an alternative models/tf.fcnet.py @@ -29,15 +29,15 @@ class FullyConnectedNetwork(Model): activation = get_activation_fn(options.get("fcnet_activation")) if len(inputs.shape) > 2: - inputs = tf.layers.flatten(inputs) + inputs = tf1.layers.flatten(inputs) - with tf.name_scope("fc_net"): + with tf1.name_scope("fc_net"): i = 1 last_layer = inputs for size in hiddens: # skip final linear layer if options.get("no_final_linear") and i == len(hiddens): - output = tf.layers.dense( + output = tf1.layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(1.0), @@ -46,7 +46,7 @@ class FullyConnectedNetwork(Model): return output, output label = "fc{}".format(i) - last_layer = tf.layers.dense( + last_layer = tf1.layers.dense( last_layer, size, kernel_initializer=normc_initializer(1.0), @@ -54,7 +54,7 @@ class FullyConnectedNetwork(Model): name=label) i += 1 - output = tf.layers.dense( + output = tf1.layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(0.01), diff --git a/rllib/models/tf/layers/gru_gate.py b/rllib/models/tf/layers/gru_gate.py index f738626a8..69dba748c 100644 --- a/rllib/models/tf/layers/gru_gate.py +++ b/rllib/models/tf/layers/gru_gate.py @@ -1,6 +1,6 @@ from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class GRUGate(tf.keras.layers.Layer): diff --git a/rllib/models/tf/layers/multi_head_attention.py b/rllib/models/tf/layers/multi_head_attention.py index ccc461364..04583adaa 100644 --- a/rllib/models/tf/layers/multi_head_attention.py +++ b/rllib/models/tf/layers/multi_head_attention.py @@ -5,7 +5,7 @@ """ from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class MultiHeadAttention(tf.keras.layers.Layer): diff --git a/rllib/models/tf/layers/noisy_layer.py b/rllib/models/tf/layers/noisy_layer.py index 7024c8acd..a204bd222 100644 --- a/rllib/models/tf/layers/noisy_layer.py +++ b/rllib/models/tf/layers/noisy_layer.py @@ -3,7 +3,7 @@ import numpy as np from ray.rllib.utils.framework import get_activation_fn, get_variable, \ try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class NoisyLayer(tf.keras.layers.Layer): diff --git a/rllib/models/tf/layers/relative_multi_head_attention.py b/rllib/models/tf/layers/relative_multi_head_attention.py index eb9d2f9c9..affd48cee 100644 --- a/rllib/models/tf/layers/relative_multi_head_attention.py +++ b/rllib/models/tf/layers/relative_multi_head_attention.py @@ -1,6 +1,6 @@ from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class RelativeMultiHeadAttention(tf.keras.layers.Layer): diff --git a/rllib/models/tf/layers/skip_connection.py b/rllib/models/tf/layers/skip_connection.py index f56c7b9ac..f2f0e1d5f 100644 --- a/rllib/models/tf/layers/skip_connection.py +++ b/rllib/models/tf/layers/skip_connection.py @@ -1,6 +1,6 @@ from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class SkipConnection(tf.keras.layers.Layer): diff --git a/rllib/models/tf/lstm_v1.py b/rllib/models/tf/lstm_v1.py index c1889340f..e7e4f4a20 100644 --- a/rllib/models/tf/lstm_v1.py +++ b/rllib/models/tf/lstm_v1.py @@ -7,7 +7,7 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.deprecation import deprecation_warning from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() # Deprecated: see as an alternative models/tf/recurrent_net.py @@ -45,7 +45,7 @@ class LSTM(Model): last_layer = add_time_dimension(features, self.seq_lens) # Setup the LSTM cell - lstm = tf.nn.rnn_cell.LSTMCell(cell_size, state_is_tuple=True) + lstm = tf1.nn.rnn_cell.LSTMCell(cell_size, state_is_tuple=True) self.state_init = [ np.zeros(lstm.state_size.c, np.float32), np.zeros(lstm.state_size.h, np.float32) @@ -55,15 +55,15 @@ class LSTM(Model): if self.state_in: c_in, h_in = self.state_in else: - c_in = tf.placeholder( + c_in = tf1.placeholder( tf.float32, [None, lstm.state_size.c], name="c") - h_in = tf.placeholder( + h_in = tf1.placeholder( tf.float32, [None, lstm.state_size.h], name="h") self.state_in = [c_in, h_in] # Setup LSTM outputs - state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in) - lstm_out, lstm_state = tf.nn.dynamic_rnn( + state_in = tf1.nn.rnn_cell.LSTMStateTuple(c_in, h_in) + lstm_out, lstm_state = tf1.nn.dynamic_rnn( lstm, last_layer, initial_state=state_in, diff --git a/rllib/models/tf/misc.py b/rllib/models/tf/misc.py index 64034407a..1da1bbb86 100644 --- a/rllib/models/tf/misc.py +++ b/rllib/models/tf/misc.py @@ -1,7 +1,7 @@ import numpy as np from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() def normc_initializer(std=1.0): @@ -24,7 +24,7 @@ def conv2d(x, if dtype is None: dtype = tf.float32 - with tf.variable_scope(name): + with tf1.variable_scope(name): stride_shape = [1, stride[0], stride[1], 1] filter_shape = [ filter_size[0], filter_size[1], @@ -40,24 +40,24 @@ def conv2d(x, # Initialize weights with random weights. w_bound = np.sqrt(6 / (fan_in + fan_out)) - w = tf.get_variable( + w = tf1.get_variable( "W", filter_shape, dtype, - tf.random_uniform_initializer(-w_bound, w_bound), + tf1.random_uniform_initializer(-w_bound, w_bound), collections=collections) - b = tf.get_variable( + b = tf1.get_variable( "b", [1, 1, 1, num_filters], - initializer=tf.constant_initializer(0.0), + initializer=tf1.constant_initializer(0.0), collections=collections) - return tf.nn.conv2d(x, w, stride_shape, pad) + b + return tf1.nn.conv2d(x, w, stride_shape, pad) + b def linear(x, size, name, initializer=None, bias_init=0): - w = tf.get_variable( + w = tf1.get_variable( name + "/w", [x.get_shape()[1], size], initializer=initializer) - b = tf.get_variable( - name + "/b", [size], initializer=tf.constant_initializer(bias_init)) + b = tf1.get_variable( + name + "/b", [size], initializer=tf1.constant_initializer(bias_init)) return tf.matmul(x, w) + b diff --git a/rllib/models/tf/modelv1_compat.py b/rllib/models/tf/modelv1_compat.py index fb90c2bbf..a44deba19 100644 --- a/rllib/models/tf/modelv1_compat.py +++ b/rllib/models/tf/modelv1_compat.py @@ -9,7 +9,7 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.tf_ops import scope_vars -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -47,7 +47,7 @@ def make_v1_wrapper(legacy_model_cls): # Tracks update ops self._update_ops = None - with tf.variable_scope(self.name) as scope: + with tf1.variable_scope(self.name) as scope: self.variable_scope = scope @override(ModelV2) @@ -58,20 +58,20 @@ def make_v1_wrapper(legacy_model_cls): def __call__(self, input_dict, state, seq_lens): if self.cur_instance: # create a weight-sharing model copy - with tf.variable_scope(self.cur_instance.scope, reuse=True): + with tf1.variable_scope(self.cur_instance.scope, reuse=True): new_instance = self.legacy_model_cls( input_dict, self.obs_space, self.action_space, self.num_outputs, self.model_config, state, seq_lens) else: # create a new model instance - with tf.variable_scope(self.name): + with tf1.variable_scope(self.name): prev_update_ops = set( - tf.get_collection(tf.GraphKeys.UPDATE_OPS)) + tf1.get_collection(tf1.GraphKeys.UPDATE_OPS)) new_instance = self.legacy_model_cls( input_dict, self.obs_space, self.action_space, self.num_outputs, self.model_config, state, seq_lens) self._update_ops = list( - set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - + set(tf1.get_collection(tf1.GraphKeys.UPDATE_OPS)) - prev_update_ops) if len(new_instance.state_init) != len(self.get_initial_state()): raise ValueError( @@ -112,8 +112,9 @@ def make_v1_wrapper(legacy_model_cls): def value_function(self): assert self.cur_instance is not None, "must call forward first" - with tf.variable_scope(self.variable_scope): - with tf.variable_scope("value_function", reuse=tf.AUTO_REUSE): + with tf1.variable_scope(self.variable_scope): + with tf1.variable_scope( + "value_function", reuse=tf1.AUTO_REUSE): # Simple case: sharing the feature layer if self.model_config["vf_share_layers"]: return tf.reshape( diff --git a/rllib/models/tf/recurrent_net.py b/rllib/models/tf/recurrent_net.py index d31389d37..355213800 100644 --- a/rllib/models/tf/recurrent_net.py +++ b/rllib/models/tf/recurrent_net.py @@ -7,7 +7,7 @@ from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.utils.annotations import override, DeveloperAPI from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() @DeveloperAPI @@ -160,18 +160,17 @@ class LSTMWrapper(RecurrentNetwork): # Concat. prev-action/reward if required. if self.model_config["lstm_use_prev_action_reward"]: - if self.model_config["lstm_use_prev_action_reward"]: - wrapped_out = tf.concat( - [ - wrapped_out, - tf.reshape( - tf.cast(input_dict[SampleBatch.PREV_ACTIONS], - tf.float32), [-1, self.action_dim]), - tf.reshape( - tf.cast(input_dict[SampleBatch.PREV_REWARDS], - tf.float32), [-1, 1]), - ], - axis=1) + wrapped_out = tf.concat( + [ + wrapped_out, + tf.reshape( + tf.cast(input_dict[SampleBatch.PREV_ACTIONS], + tf.float32), [-1, self.action_dim]), + tf.reshape( + tf.cast(input_dict[SampleBatch.PREV_REWARDS], + tf.float32), [-1, 1]), + ], + axis=1) # Then through our LSTM. input_dict["obs_flat"] = wrapped_out diff --git a/rllib/models/tf/tf_action_dist.py b/rllib/models/tf/tf_action_dist.py index 031c5ff5c..a6e14257a 100644 --- a/rllib/models/tf/tf_action_dist.py +++ b/rllib/models/tf/tf_action_dist.py @@ -9,7 +9,7 @@ from ray.rllib.utils.annotations import override, DeveloperAPI from ray.rllib.utils.framework import try_import_tf, try_import_tfp from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() tfp = try_import_tfp() tree = try_import_tree() @@ -85,7 +85,7 @@ class Categorical(TFActionDistribution): @override(TFActionDistribution) def _build_sample_op(self): - return tf.squeeze(tf.multinomial(self.inputs, 1), axis=1) + return tf.squeeze(tf.random.categorical(self.inputs, 1), axis=1) @staticmethod @override(ActionDistribution) diff --git a/rllib/models/tf/tf_modelv2.py b/rllib/models/tf/tf_modelv2.py index f8b5859ee..94565286f 100644 --- a/rllib/models/tf/tf_modelv2.py +++ b/rllib/models/tf/tf_modelv2.py @@ -2,7 +2,7 @@ from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.utils.annotations import override, PublicAPI from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() @PublicAPI @@ -39,10 +39,10 @@ class TFModelV2(ModelV2): name, framework="tf") self.var_list = [] - if tf.executing_eagerly(): + if tf1.executing_eagerly(): self.graph = None else: - self.graph = tf.get_default_graph() + self.graph = tf1.get_default_graph() def context(self): """Returns a contextmanager for the current TF graph.""" diff --git a/rllib/models/tf/visionnet.py b/rllib/models/tf/visionnet.py index e4fd75a40..97f8bcf5d 100644 --- a/rllib/models/tf/visionnet.py +++ b/rllib/models/tf/visionnet.py @@ -3,7 +3,7 @@ from ray.rllib.models.tf.visionnet_v1 import _get_filter_config from ray.rllib.models.tf.misc import normc_initializer from ray.rllib.utils.framework import get_activation_fn, try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class VisionNetwork(TFModelV2): diff --git a/rllib/models/tf/visionnet_v1.py b/rllib/models/tf/visionnet_v1.py index 539e84e9c..417149402 100644 --- a/rllib/models/tf/visionnet_v1.py +++ b/rllib/models/tf/visionnet_v1.py @@ -4,7 +4,7 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.deprecation import deprecation_warning from ray.rllib.utils.framework import get_activation_fn, try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() # Deprecated: see as an alternative models/tf.visionnet.py @@ -24,9 +24,9 @@ class VisionNetwork(Model): activation = get_activation_fn(options.get("conv_activation")) - with tf.name_scope("vision_net"): + with tf1.name_scope("vision_net"): for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): - inputs = tf.layers.conv2d( + inputs = tf1.layers.conv2d( inputs, out_size, kernel, @@ -38,7 +38,7 @@ class VisionNetwork(Model): # skip final linear layer if options.get("no_final_linear"): - fc_out = tf.layers.conv2d( + fc_out = tf1.layers.conv2d( inputs, num_outputs, kernel, @@ -48,7 +48,7 @@ class VisionNetwork(Model): name="fc_out") return flatten(fc_out), flatten(fc_out) - fc1 = tf.layers.conv2d( + fc1 = tf1.layers.conv2d( inputs, out_size, kernel, @@ -56,7 +56,7 @@ class VisionNetwork(Model): activation=activation, padding="valid", name="fc1") - fc2 = tf.layers.conv2d( + fc2 = tf1.layers.conv2d( fc1, num_outputs, [1, 1], activation=None, diff --git a/rllib/offline/input_reader.py b/rllib/offline/input_reader.py index 9fe5f4309..c0eeb11da 100644 --- a/rllib/offline/input_reader.py +++ b/rllib/offline/input_reader.py @@ -6,7 +6,7 @@ from ray.rllib.policy.sample_batch import MultiAgentBatch from ray.rllib.utils.annotations import PublicAPI from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -75,7 +75,7 @@ class InputReader: k: (-1, ) + s[1:] for (k, s) in [(k, batch[k].shape) for k in keys] } - queue = tf.FIFOQueue(capacity=queue_size, dtypes=dtypes, names=keys) + queue = tf1.FIFOQueue(capacity=queue_size, dtypes=dtypes, names=keys) tensors = queue.dequeue() logger.info("Creating TF queue runner for {}".format(self)) @@ -92,12 +92,12 @@ class _QueueRunner(threading.Thread): def __init__(self, input_reader, queue, keys, dtypes): threading.Thread.__init__(self) - self.sess = tf.get_default_session() + self.sess = tf1.get_default_session() self.daemon = True self.input_reader = input_reader self.keys = keys self.queue = queue - self.placeholders = [tf.placeholder(dtype) for dtype in dtypes] + self.placeholders = [tf1.placeholder(dtype) for dtype in dtypes] self.enqueue_op = queue.enqueue(dict(zip(keys, self.placeholders))) def enqueue(self, batch): diff --git a/rllib/offline/json_reader.py b/rllib/offline/json_reader.py index b72d139cc..372349cfd 100644 --- a/rllib/offline/json_reader.py +++ b/rllib/offline/json_reader.py @@ -45,7 +45,7 @@ class JsonReader(InputReader): logger.warning( "Treating input directory as glob pattern: {}".format( inputs)) - if urlparse(inputs).scheme not in ["d", ""]: + if urlparse(inputs).scheme not in ["", "c"]: raise ValueError( "Don't know how to glob over `{}`, ".format(inputs) + "please specify a list of files to read instead.") @@ -123,7 +123,7 @@ class JsonReader(InputReader): def _next_file(self): path = random.choice(self.files) - if urlparse(path).scheme: + if urlparse(path).scheme not in ["", "c"]: if smart_open is None: raise ValueError( "You must install the `smart_open` module to read " diff --git a/rllib/offline/json_writer.py b/rllib/offline/json_writer.py index 47df20329..f9700eb44 100644 --- a/rllib/offline/json_writer.py +++ b/rllib/offline/json_writer.py @@ -42,7 +42,7 @@ class JsonWriter(OutputWriter): self.ioctx = ioctx or IOContext() self.max_file_size = max_file_size self.compress_columns = compress_columns - if urlparse(path).scheme: + if urlparse(path).scheme not in ["", "c"]: self.path_is_uri = True else: path = os.path.abspath(os.path.expanduser(path)) diff --git a/rllib/optimizers/aso_multi_gpu_learner.py b/rllib/optimizers/aso_multi_gpu_learner.py index 1935e78c5..346babf63 100644 --- a/rllib/optimizers/aso_multi_gpu_learner.py +++ b/rllib/optimizers/aso_multi_gpu_learner.py @@ -15,7 +15,7 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.timer import TimerStat -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -86,14 +86,15 @@ class TFMultiGPULearner(LearnerThread): self.par_opt = [] with self.local_worker.tf_sess.graph.as_default(): with self.local_worker.tf_sess.as_default(): - with tf.variable_scope(DEFAULT_POLICY_ID, reuse=tf.AUTO_REUSE): + with tf1.variable_scope( + DEFAULT_POLICY_ID, reuse=tf1.AUTO_REUSE): if self.policy._state_inputs: rnn_inputs = self.policy._state_inputs + [ self.policy._seq_lens ] else: rnn_inputs = [] - adam = tf.train.AdamOptimizer(self.lr) + adam = tf1.train.AdamOptimizer(self.lr) for _ in range(num_data_loader_buffers): self.par_opt.append( LocalSyncParallelOptimizer( @@ -105,7 +106,7 @@ class TFMultiGPULearner(LearnerThread): self.policy.copy)) self.sess = self.local_worker.tf_sess - self.sess.run(tf.global_variables_initializer()) + self.sess.run(tf1.global_variables_initializer()) self.idle_optimizers = queue.Queue() self.ready_optimizers = queue.Queue() diff --git a/rllib/optimizers/multi_gpu_impl.py b/rllib/optimizers/multi_gpu_impl.py index 0771bb18b..ac6d475e9 100644 --- a/rllib/optimizers/multi_gpu_impl.py +++ b/rllib/optimizers/multi_gpu_impl.py @@ -5,7 +5,7 @@ from ray.util.debug import log_once from ray.rllib.utils.debug import summarize from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() # Variable scope in which created variables will be placed under TOWER_SCOPE_NAME = "tower" @@ -63,21 +63,21 @@ class LocalSyncParallelOptimizer: self.build_graph = build_graph # First initialize the shared loss network - with tf.name_scope(TOWER_SCOPE_NAME): + with tf1.name_scope(TOWER_SCOPE_NAME): self._shared_loss = build_graph(self.loss_inputs) - shared_ops = tf.get_collection( - tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name) + shared_ops = tf1.get_collection( + tf1.GraphKeys.UPDATE_OPS, scope=tf1.get_variable_scope().name) # Then setup the per-device loss graphs that use the shared weights - self._batch_index = tf.placeholder(tf.int32, name="batch_index") + self._batch_index = tf1.placeholder(tf.int32, name="batch_index") # Dynamic batch size, which may be shrunk if there isn't enough data - self._per_device_batch_size = tf.placeholder( + self._per_device_batch_size = tf1.placeholder( tf.int32, name="per_device_batch_size") self._loaded_per_device_batch_size = max_per_device_batch_size # When loading RNN input, we dynamically determine the max seq len - self._max_seq_len = tf.placeholder(tf.int32, name="max_seq_len") + self._max_seq_len = tf1.placeholder(tf.int32, name="max_seq_len") self._loaded_max_seq_len = 1 # Split on the CPU in case the data doesn't fit in GPU memory. @@ -103,15 +103,15 @@ class LocalSyncParallelOptimizer: # gather update ops for any batch norm layers. TODO(ekl) here we will # use all the ops found which won't work for DQN / DDPG, but those # aren't supported with multi-gpu right now anyways. - self._update_ops = tf.get_collection( - tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name) + self._update_ops = tf1.get_collection( + tf1.GraphKeys.UPDATE_OPS, scope=tf1.get_variable_scope().name) for op in shared_ops: self._update_ops.remove(op) # only care about tower update ops if self._update_ops: logger.debug("Update ops to run on apply gradient: {}".format( self._update_ops)) - with tf.control_dependencies(self._update_ops): + with tf1.control_dependencies(self._update_ops): self._train_op = self.optimizer.apply_gradients(avg) def load_data(self, sess, inputs, state_inputs): @@ -265,11 +265,11 @@ class LocalSyncParallelOptimizer: def _setup_device(self, device, device_input_placeholders, num_data_in): assert num_data_in <= len(device_input_placeholders) with tf.device(device): - with tf.name_scope(TOWER_SCOPE_NAME): + with tf1.name_scope(TOWER_SCOPE_NAME): device_input_batches = [] device_input_slices = [] for i, ph in enumerate(device_input_placeholders): - current_batch = tf.Variable( + current_batch = tf1.Variable( ph, trainable=False, validate_shape=False, diff --git a/rllib/optimizers/multi_gpu_optimizer.py b/rllib/optimizers/multi_gpu_optimizer.py index 20883ff83..8ff0c2370 100644 --- a/rllib/optimizers/multi_gpu_optimizer.py +++ b/rllib/optimizers/multi_gpu_optimizer.py @@ -16,7 +16,7 @@ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.sgd import averaged from ray.rllib.utils.timer import TimerStat -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -115,7 +115,7 @@ class LocalMultiGPUOptimizer(PolicyOptimizer): with self.workers.local_worker().tf_sess.graph.as_default(): with self.workers.local_worker().tf_sess.as_default(): for policy_id, policy in self.policies.items(): - with tf.variable_scope(policy_id, reuse=tf.AUTO_REUSE): + with tf1.variable_scope(policy_id, reuse=tf1.AUTO_REUSE): if policy._state_inputs: rnn_inputs = policy._state_inputs + [ policy._seq_lens @@ -130,7 +130,7 @@ class LocalMultiGPUOptimizer(PolicyOptimizer): self.per_device_batch_size, policy.copy)) self.sess = self.workers.local_worker().tf_sess - self.sess.run(tf.global_variables_initializer()) + self.sess.run(tf1.global_variables_initializer()) @override(PolicyOptimizer) def step(self): diff --git a/rllib/optimizers/tests/test_optimizers.py b/rllib/optimizers/tests/test_optimizers.py index 35ff838de..0c145757c 100644 --- a/rllib/optimizers/tests/test_optimizers.py +++ b/rllib/optimizers/tests/test_optimizers.py @@ -14,7 +14,7 @@ from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.tests.mock_worker import _MockWorker from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class LRScheduleTest(unittest.TestCase): @@ -250,7 +250,7 @@ class AsyncSamplesOptimizerTest(unittest.TestCase): def _make_envs(self): def make_sess(): - return tf.Session(config=tf.ConfigProto(device_count={"CPU": 2})) + return tf1.Session(config=tf1.ConfigProto(device_count={"CPU": 2})) local = RolloutWorker( env_creator=lambda _: gym.make("CartPole-v0"), diff --git a/rllib/policy/dynamic_tf_policy.py b/rllib/policy/dynamic_tf_policy.py index d8502e91b..68fde7339 100644 --- a/rllib/policy/dynamic_tf_policy.py +++ b/rllib/policy/dynamic_tf_policy.py @@ -14,7 +14,7 @@ from ray.rllib.utils.debug import summarize from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.tracking_dict import UsageTrackingDict -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -116,7 +116,7 @@ class DynamicTFPolicy(TFPolicy): explore = existing_inputs["is_exploring"] timestep = existing_inputs["timestep"] else: - obs = tf.placeholder( + obs = tf1.placeholder( tf.float32, shape=[None] + list(obs_space.shape), name="observation") @@ -124,11 +124,11 @@ class DynamicTFPolicy(TFPolicy): if self._obs_include_prev_action_reward: prev_actions = ModelCatalog.get_action_placeholder( action_space, "prev_action") - prev_rewards = tf.placeholder( + prev_rewards = tf1.placeholder( tf.float32, [None], name="prev_reward") - explore = tf.placeholder_with_default( + explore = tf1.placeholder_with_default( True, (), name="is_exploring") - timestep = tf.placeholder(tf.int32, (), name="timestep") + timestep = tf1.placeholder(tf.int32, (), name="timestep") self._input_dict = { SampleBatch.CUR_OBS: obs, @@ -137,7 +137,7 @@ class DynamicTFPolicy(TFPolicy): "is_training": self._get_is_training_placeholder(), } # Placeholder for RNN time-chunk valid lengths. - self._seq_lens = tf.placeholder( + self._seq_lens = tf1.placeholder( dtype=tf.int32, shape=[None], name="seq_lens") dist_class = dist_inputs = None @@ -176,7 +176,7 @@ class DynamicTFPolicy(TFPolicy): self._seq_lens = existing_inputs["seq_lens"] else: self._state_in = [ - tf.placeholder(shape=(None, ) + s.shape, dtype=s.dtype) + tf1.placeholder(shape=(None, ) + s.shape, dtype=s.dtype) for s in self.model.get_initial_state() ] @@ -223,7 +223,7 @@ class DynamicTFPolicy(TFPolicy): explore=explore) # Phase 1 init. - sess = tf.get_default_session() or tf.Session() + sess = tf1.get_default_session() or tf1.Session() if get_batch_divisibility_req: batch_divisibility_req = get_batch_divisibility_req(self) else: @@ -343,7 +343,7 @@ class DynamicTFPolicy(TFPolicy): dummy_batch[k] = fake_array(v) # postprocessing might depend on variable init, so run it first here - self._sess.run(tf.global_variables_initializer()) + self._sess.run(tf1.global_variables_initializer()) postprocessed_batch = self.postprocess_trajectory( SampleBatch(dummy_batch)) @@ -380,7 +380,7 @@ class DynamicTFPolicy(TFPolicy): continue shape = (None, ) + v.shape[1:] dtype = np.float32 if v.dtype == np.float64 else v.dtype - placeholder = tf.placeholder(dtype, shape=shape, name=k) + placeholder = tf1.placeholder(dtype, shape=shape, name=k) train_batch[k] = placeholder for i, si in enumerate(self._state_in): @@ -402,7 +402,7 @@ class DynamicTFPolicy(TFPolicy): if self._grad_stats_fn: self._stats_fetches.update( self._grad_stats_fn(self, train_batch, self._grads)) - self._sess.run(tf.global_variables_initializer()) + self._sess.run(tf1.global_variables_initializer()) def _do_loss_init(self, train_batch): loss = self._loss_fn(self, self.model, self.dist_class, train_batch) diff --git a/rllib/policy/eager_tf_policy.py b/rllib/policy/eager_tf_policy.py index 767f84750..9d0f3377b 100644 --- a/rllib/policy/eager_tf_policy.py +++ b/rllib/policy/eager_tf_policy.py @@ -16,7 +16,7 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.spaces.space_utils import flatten_to_single_ndarray -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -239,7 +239,7 @@ def build_eager_tf_policy(name, ) self.exploration = self._create_exploration() self._state_in = [ - tf.convert_to_tensor(np.array([s])) + tf.convert_to_tensor([s]) for s in self.model.get_initial_state() ] input_dict = { @@ -266,7 +266,7 @@ def build_eager_tf_policy(name, if optimizer_fn: self._optimizer = optimizer_fn(self, config) else: - self._optimizer = tf.train.AdamOptimizer(config["lr"]) + self._optimizer = tf1.train.AdamOptimizer(config["lr"]) if after_init: after_init(self, observation_space, action_space, config) @@ -618,8 +618,7 @@ def build_eager_tf_policy(name, SampleBatch.DONES: np.array([False], dtype=np.bool), SampleBatch.REWARDS: np.array([0], dtype=np.float32), } - if isinstance(self.action_space, Tuple) or isinstance( - self.action_space, Dict): + if isinstance(self.action_space, (Dict, Tuple)): dummy_batch[SampleBatch.ACTIONS] = [ flatten_to_single_ndarray(self.action_space.sample()) ] @@ -640,7 +639,7 @@ def build_eager_tf_policy(name, dummy_batch["seq_lens"] = np.array([1], dtype=np.int32) # Convert everything to tensors. - dummy_batch = tf.nest.map_structure(tf.convert_to_tensor, + dummy_batch = tf.nest.map_structure(tf1.convert_to_tensor, dummy_batch) # for IMPALA which expects a certain sample batch size. diff --git a/rllib/policy/rnn_sequencing.py b/rllib/policy/rnn_sequencing.py index d38ec9158..5946938dc 100644 --- a/rllib/policy/rnn_sequencing.py +++ b/rllib/policy/rnn_sequencing.py @@ -20,7 +20,7 @@ from ray.rllib.utils.annotations import DeveloperAPI from ray.rllib.utils.debug import summarize from ray.rllib.utils.framework import try_import_tf, try_import_torch -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, _ = try_import_torch() logger = logging.getLogger(__name__) @@ -203,7 +203,7 @@ def chop_into_sequences(episode_ids, seq_len = 0 unique_ids = np.add( np.add(episode_ids, agent_indices), - np.array(unroll_ids) << 32) + np.array(unroll_ids, dtype=np.int64) << 32) for uid in unique_ids: if (prev_id is not None and uid != prev_id) or \ seq_len >= max_seq_len: diff --git a/rllib/policy/tests/test_compute_log_likelihoods.py b/rllib/policy/tests/test_compute_log_likelihoods.py index 16554205c..10fa7d705 100644 --- a/rllib/policy/tests/test_compute_log_likelihoods.py +++ b/rllib/policy/tests/test_compute_log_likelihoods.py @@ -11,7 +11,7 @@ from ray.rllib.utils.test_utils import check, framework_iterator from ray.rllib.utils.numpy import one_hot, fc, MIN_LOG_NN_OUTPUT, \ MAX_LOG_NN_OUTPUT -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() def do_test_log_likelihood(run, diff --git a/rllib/policy/tf_policy.py b/rllib/policy/tf_policy.py index d3b00a2d8..6b4243190 100644 --- a/rllib/policy/tf_policy.py +++ b/rllib/policy/tf_policy.py @@ -16,7 +16,7 @@ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.schedules import ConstantSchedule, PiecewiseSchedule from ray.rllib.utils.tf_run_builder import TFRunBuilder -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -131,9 +131,9 @@ class TFPolicy(Policy): self._sampled_action = sampled_action self._is_training = self._get_is_training_placeholder() self._is_exploring = explore if explore is not None else \ - tf.placeholder_with_default(True, (), name="is_exploring") + tf1.placeholder_with_default(True, (), name="is_exploring") self._sampled_action_logp = sampled_action_logp - self._sampled_action_prob = (tf.exp(self._sampled_action_logp) + self._sampled_action_prob = (tf.math.exp(self._sampled_action_logp) if self._sampled_action_logp is not None else None) self._action_input = action_input # For logp calculations. @@ -162,7 +162,7 @@ class TFPolicy(Policy): self._apply_op = None self._stats_fetches = {} self._timestep = timestep if timestep is not None else \ - tf.placeholder(tf.int32, (), name="timestep") + tf1.placeholder(tf.int32, (), name="timestep") self._optimizer = None self._grads_and_vars = None @@ -248,12 +248,12 @@ class TFPolicy(Policy): # gather update ops for any batch norm layers if not self._update_ops: - self._update_ops = tf.get_collection( - tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name) + self._update_ops = tf1.get_collection( + tf1.GraphKeys.UPDATE_OPS, scope=tf1.get_variable_scope().name) if self._update_ops: logger.info("Update ops to run on apply gradient: {}".format( self._update_ops)) - with tf.control_dependencies(self._update_ops): + with tf1.control_dependencies(self._update_ops): self._apply_op = self.build_apply_op(self._optimizer, self._grads_and_vars) @@ -262,7 +262,7 @@ class TFPolicy(Policy): "These tensors were used in the loss_fn:\n\n{}\n".format( summarize(self._loss_input_dict))) - self._sess.run(tf.global_variables_initializer()) + self._sess.run(tf1.global_variables_initializer()) self._optimizer_variables = None if self._optimizer: self._optimizer_variables = \ @@ -397,12 +397,12 @@ class TFPolicy(Policy): def export_model(self, export_dir): """Export tensorflow graph to export_dir for serving.""" with self._sess.graph.as_default(): - builder = tf.saved_model.builder.SavedModelBuilder(export_dir) + builder = tf1.saved_model.builder.SavedModelBuilder(export_dir) signature_def_map = self._build_signature_def() builder.add_meta_graph_and_variables( - self._sess, [tf.saved_model.tag_constants.SERVING], + self._sess, [tf1.saved_model.tag_constants.SERVING], signature_def_map=signature_def_map, - saver=tf.summary.FileWriter(export_dir).add_graph( + saver=tf1.summary.FileWriter(export_dir).add_graph( graph=self._sess.graph)) builder.save() @@ -417,7 +417,7 @@ class TFPolicy(Policy): raise save_path = os.path.join(export_dir, filename_prefix) with self._sess.graph.as_default(): - saver = tf.train.Saver() + saver = tf1.train.Saver() saver.save(self._sess, save_path) @override(Policy) @@ -479,9 +479,9 @@ class TFPolicy(Policy): def optimizer(self): """TF optimizer to use for policy optimization.""" if hasattr(self, "config"): - return tf.train.AdamOptimizer(learning_rate=self.config["lr"]) + return tf1.train.AdamOptimizer(learning_rate=self.config["lr"]) else: - return tf.train.AdamOptimizer() + return tf1.train.AdamOptimizer() @DeveloperAPI def gradients(self, optimizer, loss): @@ -495,7 +495,7 @@ class TFPolicy(Policy): # specify global_step for TD3 which needs to count the num updates return optimizer.apply_gradients( self._grads_and_vars, - global_step=tf.train.get_or_create_global_step()) + global_step=tf1.train.get_or_create_global_step()) @DeveloperAPI def _get_is_training_placeholder(self): @@ -504,7 +504,7 @@ class TFPolicy(Policy): This can be called safely before __init__ has run. """ if not hasattr(self, "_is_training"): - self._is_training = tf.placeholder_with_default( + self._is_training = tf1.placeholder_with_default( False, (), name="is_training") return self._is_training @@ -519,7 +519,7 @@ class TFPolicy(Policy): """ feed_dict = self.extra_compute_action_feed_dict() return { - k.name: tf.saved_model.utils.build_tensor_info(k) + k.name: tf1.saved_model.utils.build_tensor_info(k) for k in feed_dict.keys() } @@ -529,7 +529,7 @@ class TFPolicy(Policy): """ fetches = self.extra_compute_action_fetches() return { - k: tf.saved_model.utils.build_tensor_info(fetches[k]) + k: tf1.saved_model.utils.build_tensor_info(fetches[k]) for k in fetches.keys() } @@ -539,38 +539,40 @@ class TFPolicy(Policy): # build input signatures input_signature = self._extra_input_signature_def() input_signature["observations"] = \ - tf.saved_model.utils.build_tensor_info(self._obs_input) + tf1.saved_model.utils.build_tensor_info(self._obs_input) if self._seq_lens is not None: input_signature["seq_lens"] = \ - tf.saved_model.utils.build_tensor_info(self._seq_lens) + tf1.saved_model.utils.build_tensor_info(self._seq_lens) if self._prev_action_input is not None: input_signature["prev_action"] = \ - tf.saved_model.utils.build_tensor_info(self._prev_action_input) + tf1.saved_model.utils.build_tensor_info( + self._prev_action_input) if self._prev_reward_input is not None: input_signature["prev_reward"] = \ - tf.saved_model.utils.build_tensor_info(self._prev_reward_input) + tf1.saved_model.utils.build_tensor_info( + self._prev_reward_input) input_signature["is_training"] = \ - tf.saved_model.utils.build_tensor_info(self._is_training) + tf1.saved_model.utils.build_tensor_info(self._is_training) for state_input in self._state_inputs: input_signature[state_input.name] = \ - tf.saved_model.utils.build_tensor_info(state_input) + tf1.saved_model.utils.build_tensor_info(state_input) # build output signatures output_signature = self._extra_output_signature_def() for i, a in enumerate(tf.nest.flatten(self._sampled_action)): output_signature["actions_{}".format(i)] = \ - tf.saved_model.utils.build_tensor_info(a) + tf1.saved_model.utils.build_tensor_info(a) for state_output in self._state_outputs: output_signature[state_output.name] = \ - tf.saved_model.utils.build_tensor_info(state_output) + tf1.saved_model.utils.build_tensor_info(state_output) signature_def = ( - tf.saved_model.signature_def_utils.build_signature_def( + tf1.saved_model.signature_def_utils.build_signature_def( input_signature, output_signature, - tf.saved_model.signature_constants.PREDICT_METHOD_NAME)) - signature_def_key = (tf.saved_model.signature_constants. + tf1.saved_model.signature_constants.PREDICT_METHOD_NAME)) + signature_def_key = (tf1.saved_model.signature_constants. DEFAULT_SERVING_SIGNATURE_DEF_KEY) signature_def_map = {signature_def_key: signature_def} return signature_def_map @@ -708,7 +710,7 @@ class LearningRateSchedule: @DeveloperAPI def __init__(self, lr, lr_schedule): - self.cur_lr = tf.get_variable("lr", initializer=lr, trainable=False) + self.cur_lr = tf1.get_variable("lr", initializer=lr, trainable=False) if lr_schedule is None: self.lr_schedule = ConstantSchedule(lr, framework=None) else: @@ -724,7 +726,7 @@ class LearningRateSchedule: @override(TFPolicy) def optimizer(self): - return tf.train.AdamOptimizer(learning_rate=self.cur_lr) + return tf1.train.AdamOptimizer(learning_rate=self.cur_lr) @DeveloperAPI @@ -733,7 +735,7 @@ class EntropyCoeffSchedule: @DeveloperAPI def __init__(self, entropy_coeff, entropy_coeff_schedule): - self.entropy_coeff = tf.get_variable( + self.entropy_coeff = tf1.get_variable( "entropy_coeff", initializer=entropy_coeff, trainable=False) if entropy_coeff_schedule is None: diff --git a/rllib/policy/tf_policy_template.py b/rllib/policy/tf_policy_template.py index 5c1f51f03..c355e6f4d 100644 --- a/rllib/policy/tf_policy_template.py +++ b/rllib/policy/tf_policy_template.py @@ -4,9 +4,6 @@ from ray.rllib.policy.policy import Policy, LEARNER_STATS_KEY from ray.rllib.policy.tf_policy import TFPolicy from ray.rllib.utils import add_mixins from ray.rllib.utils.annotations import override, DeveloperAPI -from ray.rllib.utils.framework import try_import_tf - -tf = try_import_tf() @DeveloperAPI diff --git a/rllib/tests/test_catalog.py b/rllib/tests/test_catalog.py index 9c2c1f3da..32bfec194 100644 --- a/rllib/tests/test_catalog.py +++ b/rllib/tests/test_catalog.py @@ -14,7 +14,7 @@ from ray.rllib.models.tf.visionnet import VisionNetwork from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class CustomPreprocessor(Preprocessor): @@ -54,7 +54,7 @@ class CustomActionDistribution(TFActionDistribution): @override(TFActionDistribution) def _build_sample_op(self): - return tf.random_uniform(self.output_shape) + return tf.random.uniform(self.output_shape) @override(ActionDistribution) def logp(self, x): @@ -102,15 +102,15 @@ class ModelCatalogTest(unittest.TestCase): def test_default_models(self): ray.init(object_store_memory=1000 * 1024 * 1024) - with tf.variable_scope("test1"): + with tf1.variable_scope("test1"): p1 = ModelCatalog.get_model_v2( - obs_space=Box(0, 1, shape=(3, ), dtype=np.float32), + obs_space=Box(0, 1, shape=(3,), dtype=np.float32), action_space=Discrete(5), num_outputs=5, model_config={}) self.assertEqual(type(p1), FullyConnectedNetwork) - with tf.variable_scope("test2"): + with tf1.variable_scope("test2"): p2 = ModelCatalog.get_model_v2( obs_space=Box(0, 1, shape=(84, 84, 3), dtype=np.float32), action_space=Discrete(5), @@ -149,7 +149,7 @@ class ModelCatalogTest(unittest.TestCase): self.assertEqual(param_shape, action_space.shape) # test the class works as a distribution - dist_input = tf.placeholder(tf.float32, (None, ) + param_shape) + dist_input = tf1.placeholder(tf.float32, (None,) + param_shape) model = Model() model.model_config = model_config dist = dist_cls(dist_input, model=model) @@ -163,7 +163,7 @@ class ModelCatalogTest(unittest.TestCase): dist_cls, param_shape = ModelCatalog.get_action_dist( action_space, model_config) self.assertEqual(param_shape, (3, )) - dist_input = tf.placeholder(tf.float32, (None, ) + param_shape) + dist_input = tf1.placeholder(tf.float32, (None,) + param_shape) model.model_config = model_config dist = dist_cls(dist_input, model=model) self.assertEqual(dist.sample().shape[1:], dist_input.shape[1:]) diff --git a/rllib/tests/test_model_imports.py b/rllib/tests/test_model_imports.py index cc800c9f3..cf9aa8519 100644 --- a/rllib/tests/test_model_imports.py +++ b/rllib/tests/test_model_imports.py @@ -14,7 +14,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 from ray.rllib.utils.framework import try_import_tf, try_import_torch from ray.rllib.utils.test_utils import check, framework_iterator -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, nn = try_import_torch() diff --git a/rllib/tests/test_nested_observation_spaces.py b/rllib/tests/test_nested_observation_spaces.py index dabc85be1..c9e1607d4 100644 --- a/rllib/tests/test_nested_observation_spaces.py +++ b/rllib/tests/test_nested_observation_spaces.py @@ -22,7 +22,7 @@ from ray.tune.registry import register_env from ray.rllib.utils.framework import try_import_tf, try_import_torch from ray.rllib.utils.spaces.repeated import Repeated -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() _, nn = try_import_torch() DICT_SPACE = spaces.Dict({ @@ -241,9 +241,9 @@ class DictSpyModel(TFModelV2): pickle.dumps((pos, front_cam, task)), overwrite=True) DictSpyModel.capture_index += 1 - return 0 + return np.array(0, dtype=np.int64) - spy_fn = tf.py_func( + spy_fn = tf1.py_func( spy, [ input_dict["obs"]["sensors"]["position"], input_dict["obs"]["sensors"]["front_cam"][0], @@ -252,9 +252,9 @@ class DictSpyModel(TFModelV2): tf.int64, stateful=True) - with tf.control_dependencies([spy_fn]): - output = tf.layers.dense(input_dict["obs"]["sensors"]["position"], - self.num_outputs) + with tf1.control_dependencies([spy_fn]): + output = tf1.layers.dense(input_dict["obs"]["sensors"]["position"], + self.num_outputs) return output, [] @@ -270,9 +270,9 @@ class TupleSpyModel(TFModelV2): pickle.dumps((pos, cam, task)), overwrite=True) TupleSpyModel.capture_index += 1 - return 0 + return np.array(0, dtype=np.int64) - spy_fn = tf.py_func( + spy_fn = tf1.py_func( spy, [ input_dict["obs"][0], input_dict["obs"][1][0], @@ -281,8 +281,8 @@ class TupleSpyModel(TFModelV2): tf.int64, stateful=True) - with tf.control_dependencies([spy_fn]): - output = tf.layers.dense(input_dict["obs"][0], self.num_outputs) + with tf1.control_dependencies([spy_fn]): + output = tf1.layers.dense(input_dict["obs"][0], self.num_outputs) return output, [] diff --git a/rllib/train.py b/rllib/train.py index c7885f5a0..691ed8fd2 100755 --- a/rllib/train.py +++ b/rllib/train.py @@ -14,7 +14,7 @@ from ray.tune.tune import _make_scheduler, run_experiments from ray.rllib.utils.framework import try_import_tf, try_import_torch # Try to import both backends for flag checking/warnings. -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, _ = try_import_torch() EXAMPLE_USAGE = """ diff --git a/rllib/utils/exploration/epsilon_greedy.py b/rllib/utils/exploration/epsilon_greedy.py index 26abd766a..75b17215e 100644 --- a/rllib/utils/exploration/epsilon_greedy.py +++ b/rllib/utils/exploration/epsilon_greedy.py @@ -9,7 +9,7 @@ from ray.rllib.utils.from_config import from_config from ray.rllib.utils.numpy import LARGE_INTEGER from ray.rllib.utils.schedules import Schedule, PiecewiseSchedule -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, _ = try_import_torch() @@ -97,12 +97,11 @@ class EpsilonGreedy(Exploration): tf.equal(q_values, tf.float32.min), tf.ones_like(q_values) * tf.float32.min, tf.ones_like(q_values)) random_actions = tf.squeeze( - tf.multinomial(random_valid_action_logits, 1), axis=1) + tf.random.categorical(random_valid_action_logits, 1), axis=1) - chose_random = tf.random_uniform( + chose_random = tf.random.uniform( tf.stack([batch_size]), - minval=0, maxval=1, dtype=tf.float32) \ - < epsilon + minval=0, maxval=1, dtype=tf.float32) < epsilon action = tf.cond( pred=tf.constant(explore, dtype=tf.bool) @@ -112,8 +111,8 @@ class EpsilonGreedy(Exploration): ), false_fn=lambda: exploit_action) - assign_op = tf.assign(self.last_timestep, timestep) - with tf.control_dependencies([assign_op]): + assign_op = tf1.assign(self.last_timestep, timestep) + with tf1.control_dependencies([assign_op]): return action, tf.zeros_like(action, dtype=tf.float32) def _get_torch_exploration_action(self, q_values, explore, timestep): diff --git a/rllib/utils/exploration/gaussian_noise.py b/rllib/utils/exploration/gaussian_noise.py index 16554c927..34ebba45d 100644 --- a/rllib/utils/exploration/gaussian_noise.py +++ b/rllib/utils/exploration/gaussian_noise.py @@ -9,7 +9,7 @@ from ray.rllib.utils.framework import try_import_tf, try_import_torch, \ get_variable, TensorType from ray.rllib.utils.schedules.piecewise_schedule import PiecewiseSchedule -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, _ = try_import_torch() @@ -96,7 +96,7 @@ class GaussianNoise(Exploration): deterministic_actions = action_dist.deterministic_sample() # Take a Gaussian sample with our stddev (mean=0.0) and scale it. - gaussian_sample = self.scale_schedule(ts) * tf.random_normal( + gaussian_sample = self.scale_schedule(ts) * tf.random.normal( tf.shape(deterministic_actions), stddev=self.stddev) # Stochastic actions could either be: random OR action + noise. @@ -120,13 +120,13 @@ class GaussianNoise(Exploration): true_fn=lambda: stochastic_actions, false_fn=lambda: deterministic_actions) # Logp=always zero. - logp = tf.zeros(shape=(batch_size, ), dtype=tf.float32) + logp = tf.zeros(shape=(batch_size,), dtype=tf.float32) # Increment `last_timestep` by 1 (or set to `timestep`). - assign_op = \ - tf.assign_add(self.last_timestep, 1) if timestep is None else \ - tf.assign(self.last_timestep, timestep) - with tf.control_dependencies([assign_op]): + assign_op = ( + tf1.assign_add(self.last_timestep, 1) if timestep is None else + tf1.assign(self.last_timestep, timestep)) + with tf1.control_dependencies([assign_op]): return action, logp def _get_torch_exploration_action(self, action_dist, explore, timestep): diff --git a/rllib/utils/exploration/ornstein_uhlenbeck_noise.py b/rllib/utils/exploration/ornstein_uhlenbeck_noise.py index 72ace558c..7b0f98ea8 100644 --- a/rllib/utils/exploration/ornstein_uhlenbeck_noise.py +++ b/rllib/utils/exploration/ornstein_uhlenbeck_noise.py @@ -5,7 +5,7 @@ from ray.rllib.utils.exploration.gaussian_noise import GaussianNoise from ray.rllib.utils.framework import try_import_tf, try_import_torch, \ get_variable -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, _ = try_import_torch() @@ -91,11 +91,11 @@ class OrnsteinUhlenbeckNoise(GaussianNoise): # Apply base-scaled and time-annealed scaled OU-noise to # deterministic actions. - gaussian_sample = tf.random_normal( + gaussian_sample = tf.random.normal( shape=[self.action_space.low.size], stddev=self.stddev) ou_new = self.ou_theta * -self.ou_state + \ self.ou_sigma * gaussian_sample - ou_state_new = tf.assign_add(self.ou_state, ou_new) + ou_state_new = tf1.assign_add(self.ou_state, ou_new) high_m_low = self.action_space.high - self.action_space.low high_m_low = tf.where( tf.math.is_inf(high_m_low), tf.ones_like(high_m_low), high_m_low) @@ -122,13 +122,13 @@ class OrnsteinUhlenbeckNoise(GaussianNoise): false_fn=lambda: deterministic_actions) # Logp=always zero. batch_size = tf.shape(deterministic_actions)[0] - logp = tf.zeros(shape=(batch_size, ), dtype=tf.float32) + logp = tf.zeros(shape=(batch_size,), dtype=tf.float32) # Increment `last_timestep` by 1 (or set to `timestep`). - assign_op = \ - tf.assign_add(self.last_timestep, 1) if timestep is None else \ - tf.assign(self.last_timestep, timestep) - with tf.control_dependencies([assign_op, ou_state_new]): + assign_op = ( + tf1.assign_add(self.last_timestep, 1) if timestep is None else + tf1.assign(self.last_timestep, timestep)) + with tf1.control_dependencies([assign_op, ou_state_new]): return action, logp @override(GaussianNoise) diff --git a/rllib/utils/exploration/parameter_noise.py b/rllib/utils/exploration/parameter_noise.py index 6654bd829..abf59f188 100644 --- a/rllib/utils/exploration/parameter_noise.py +++ b/rllib/utils/exploration/parameter_noise.py @@ -13,7 +13,7 @@ from ray.rllib.utils.framework import get_variable from ray.rllib.utils.from_config import from_config from ray.rllib.utils.numpy import softmax, SMALL_NUMBER -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, _ = try_import_torch() @@ -91,9 +91,9 @@ class ParameterNoise(Exploration): self.tf_remove_noise_op = \ self._tf_remove_noise_op() # Create convenience sample+add op for tf. - with tf.control_dependencies([self.tf_sample_new_noise_op]): + with tf1.control_dependencies([self.tf_sample_new_noise_op]): add_op = self._tf_add_stored_noise_op() - with tf.control_dependencies([add_op]): + with tf1.control_dependencies([add_op]): self.tf_sample_new_noise_and_add_op = tf.no_op() # Whether the Model's weights currently have noise added or not. @@ -303,7 +303,7 @@ class ParameterNoise(Exploration): added_noises = [] for noise in self.noise: added_noises.append( - tf.assign( + tf1.assign( noise, tf.random.normal( shape=noise.shape, @@ -361,9 +361,9 @@ class ParameterNoise(Exploration): """ add_noise_ops = list() for var, noise in zip(self.model_variables, self.noise): - add_noise_ops.append(tf.assign_add(var, noise)) + add_noise_ops.append(tf1.assign_add(var, noise)) ret = tf.group(*tuple(add_noise_ops)) - with tf.control_dependencies([ret]): + with tf1.control_dependencies([ret]): return tf.no_op() def _remove_noise(self, *, tf_sess=None): @@ -400,9 +400,9 @@ class ParameterNoise(Exploration): """ remove_noise_ops = list() for var, noise in zip(self.model_variables, self.noise): - remove_noise_ops.append(tf.assign_add(var, -noise)) + remove_noise_ops.append(tf1.assign_add(var, -noise)) ret = tf.group(*tuple(remove_noise_ops)) - with tf.control_dependencies([ret]): + with tf1.control_dependencies([ret]): return tf.no_op() @override(Exploration) diff --git a/rllib/utils/exploration/random.py b/rllib/utils/exploration/random.py index 581883320..935848bbf 100644 --- a/rllib/utils/exploration/random.py +++ b/rllib/utils/exploration/random.py @@ -11,7 +11,7 @@ from ray.rllib.utils.framework import try_import_tf, try_import_torch, \ TensorType from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, _ = try_import_torch() diff --git a/rllib/utils/exploration/stochastic_sampling.py b/rllib/utils/exploration/stochastic_sampling.py index a6431745b..f5cd7b003 100644 --- a/rllib/utils/exploration/stochastic_sampling.py +++ b/rllib/utils/exploration/stochastic_sampling.py @@ -8,7 +8,7 @@ from ray.rllib.utils.exploration.exploration import Exploration from ray.rllib.utils.framework import try_import_tf, try_import_torch, \ TensorType -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, _ = try_import_torch() tree = try_import_tree() diff --git a/rllib/utils/exploration/tests/test_explorations.py b/rllib/utils/exploration/tests/test_explorations.py index 04bfe71ed..910cb5d5b 100644 --- a/rllib/utils/exploration/tests/test_explorations.py +++ b/rllib/utils/exploration/tests/test_explorations.py @@ -11,9 +11,7 @@ import ray.rllib.agents.impala as impala import ray.rllib.agents.pg as pg import ray.rllib.agents.ppo as ppo import ray.rllib.agents.sac as sac -from ray.rllib.utils import check, framework_iterator, try_import_tf - -tf = try_import_tf() +from ray.rllib.utils import check, framework_iterator def do_test_explorations(run, diff --git a/rllib/utils/exploration/tests/test_parameter_noise.py b/rllib/utils/exploration/tests/test_parameter_noise.py index a77d71bbe..b186bb70f 100644 --- a/rllib/utils/exploration/tests/test_parameter_noise.py +++ b/rllib/utils/exploration/tests/test_parameter_noise.py @@ -3,11 +3,8 @@ import unittest import ray.rllib.agents.ddpg as ddpg import ray.rllib.agents.dqn as dqn -from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.test_utils import check, framework_iterator -tf = try_import_tf() - class TestParameterNoise(unittest.TestCase): def test_ddpg_parameter_noise(self): diff --git a/rllib/utils/framework.py b/rllib/utils/framework.py index 014d4d7d6..b57e4db77 100644 --- a/rllib/utils/framework.py +++ b/rllib/utils/framework.py @@ -21,7 +21,11 @@ def try_import_tf(error=False): error (bool): Whether to raise an error if tf cannot be imported. Returns: - The tf module (either from tf2.0.compat.v1 OR as tf1.x. + Tuple: + - tf1.x module (either from tf2.x.compat.v1 OR as tf1.x). + - tf module (resulting from `import tensorflow`). + Either tf1.x or 2.x. + - The actually installed tf version as int: 1 or 2. Raises: ImportError: If error=True and tf is not installed. @@ -30,7 +34,7 @@ def try_import_tf(error=False): # that uses them: del os.environ["RLLIB_TEST_NO_TF_IMPORT"] if "RLLIB_TEST_NO_TF_IMPORT" in os.environ: logger.warning("Not importing TensorFlow for test purposes") - return None + return None, None, None if "TF_CPP_MIN_LOG_LEVEL" not in os.environ: os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" @@ -38,34 +42,31 @@ def try_import_tf(error=False): # Try to reuse already imported tf module. This will avoid going through # the initial import steps below and thereby switching off v2_behavior # (switching off v2 behavior twice breaks all-framework tests for eager). + was_imported = False if "tensorflow" in sys.modules: tf_module = sys.modules["tensorflow"] - # Try "reducing" tf to tf.compat.v1. - try: - tf_module = tf_module.compat.v1 - # No compat.v1 -> return tf as is. - except AttributeError: - pass - return tf_module + was_imported = True - # Just in case. We should not go through the below twice. - assert "tensorflow" not in sys.modules - - try: - # Try "reducing" tf to tf.compat.v1. - import tensorflow.compat.v1 as tf - tf.logging.set_verbosity(tf.logging.ERROR) - # Disable v2 eager mode. - tf.disable_v2_behavior() - return tf - except ImportError: + else: try: - import tensorflow as tf - return tf + import tensorflow as tf_module except ImportError as e: if error: raise e - return None + return None, None, None + + # Try "reducing" tf to tf.compat.v1. + try: + tf1_module = tf_module.compat.v1 + if not was_imported: + tf1_module.disable_v2_behavior() + # No compat.v1 -> return tf as is. + except AttributeError: + tf1_module = tf_module + + version = 2 if "2." in tf_module.__version__[:2] else 1 + + return tf1_module, tf_module, version def tf_function(tf_module): @@ -221,16 +222,10 @@ def get_activation_fn(name, framework="tf"): else: if name in ["linear", None]: return None - tf = try_import_tf() + tf1, tf, tfv = try_import_tf() fn = getattr(tf.nn, name, None) if fn is not None: return fn raise ValueError("Unknown activation ({}) for framework={}!".format( name, framework)) - - -# This call should never happen inside a module's functions/classes -# as it would re-disable tf-eager. -tf = try_import_tf() -torch, _ = try_import_torch() diff --git a/rllib/utils/numpy.py b/rllib/utils/numpy.py index 650d711f4..0a6f95516 100644 --- a/rllib/utils/numpy.py +++ b/rllib/utils/numpy.py @@ -2,7 +2,7 @@ import numpy as np from ray.rllib.utils.framework import try_import_tf, try_import_torch -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, _ = try_import_torch() SMALL_NUMBER = 1e-6 diff --git a/rllib/utils/schedules/piecewise_schedule.py b/rllib/utils/schedules/piecewise_schedule.py index 6c82c30a0..b37fb1839 100644 --- a/rllib/utils/schedules/piecewise_schedule.py +++ b/rllib/utils/schedules/piecewise_schedule.py @@ -2,7 +2,7 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.schedules.schedule import Schedule -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() def _linear_interpolation(l, r, alpha): diff --git a/rllib/utils/schedules/polynomial_schedule.py b/rllib/utils/schedules/polynomial_schedule.py index f13767358..b6402da80 100644 --- a/rllib/utils/schedules/polynomial_schedule.py +++ b/rllib/utils/schedules/polynomial_schedule.py @@ -1,7 +1,7 @@ from ray.rllib.utils.schedules.schedule import Schedule from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class PolynomialSchedule(Schedule): diff --git a/rllib/utils/schedules/schedule.py b/rllib/utils/schedules/schedule.py index 52a3205c5..316f359fe 100644 --- a/rllib/utils/schedules/schedule.py +++ b/rllib/utils/schedules/schedule.py @@ -3,7 +3,7 @@ from abc import ABCMeta, abstractmethod from ray.rllib.utils.annotations import DeveloperAPI from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() @DeveloperAPI diff --git a/rllib/utils/schedules/tests/test_schedules.py b/rllib/utils/schedules/tests/test_schedules.py index 9576833ce..0fed37092 100644 --- a/rllib/utils/schedules/tests/test_schedules.py +++ b/rllib/utils/schedules/tests/test_schedules.py @@ -5,7 +5,7 @@ from ray.rllib.utils.schedules import ConstantSchedule, \ from ray.rllib.utils import check, framework_iterator, try_import_tf from ray.rllib.utils.from_config import from_config -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() class TestSchedules(unittest.TestCase): diff --git a/rllib/utils/test_utils.py b/rllib/utils/test_utils.py index 2a9275028..444381c65 100644 --- a/rllib/utils/test_utils.py +++ b/rllib/utils/test_utils.py @@ -1,10 +1,11 @@ +import gym import logging import numpy as np from ray.rllib.utils.framework import try_import_tf, try_import_torch -tf = try_import_tf() -if tf: +tf1, tf, tfv = try_import_tf() +if tf1: eager_mode = None try: from tensorflow.python.eager.context import eager_mode @@ -60,7 +61,7 @@ def framework_iterator(config=None, # Do we need a test session? sess = None if fw == "tf" and session is True: - sess = tf.Session() + sess = tf1.Session() sess.__enter__() print("framework={}".format(fw)) @@ -71,9 +72,9 @@ def framework_iterator(config=None, if fw == "tfe": eager_ctx = eager_mode() eager_ctx.__enter__() - assert tf.executing_eagerly() + assert tf1.executing_eagerly() elif fw == "tf": - assert not tf.executing_eagerly() + assert not tf1.executing_eagerly() yield fw if session is False else (fw, sess) @@ -165,18 +166,18 @@ def check(x, y, decimals=5, atol=None, rtol=None, false=False): raise e # Everything else (assume numeric or tf/torch.Tensor). else: - if tf is not None: + if tf1 is not None: # y should never be a Tensor (y=expected value). - if isinstance(y, tf.Tensor): + if isinstance(y, tf1.Tensor): raise ValueError("`y` (expected value) must not be a Tensor. " "Use numpy.ndarray instead") - if isinstance(x, tf.Tensor): + if isinstance(x, tf1.Tensor): # In eager mode, numpyize tensors. - if tf.executing_eagerly(): + if tf1.executing_eagerly(): x = x.numpy() # Otherwise, use a quick tf-session. else: - with tf.Session() as sess: + with tf1.Session() as sess: x = sess.run(x) return check( x, @@ -264,12 +265,27 @@ def check_compute_single_action(trainer, except AttributeError: pol = trainer.policy - obs_space = pol.observation_space action_space = pol.action_space for what in [pol, trainer]: - method_to_test = trainer.compute_action if what is trainer else \ - pol.compute_single_action + if what is trainer: + method_to_test = trainer.compute_action + # Get the obs-space from Workers.env (not Policy) due to possible + # pre-processor up front. + worker_set = getattr( + trainer, "workers", getattr(trainer, "_workers", None)) + assert worker_set + if isinstance(worker_set, list): + obs_space = trainer.get_policy().observation_space + try: + obs_space = obs_space.original_space + except AttributeError: + pass + else: + obs_space = worker_set.local_worker().env.observation_space + else: + method_to_test = pol.compute_single_action + obs_space = pol.observation_space for explore in [True, False]: for full_fetch in ([False, True] if what is trainer else [False]): @@ -279,7 +295,9 @@ def check_compute_single_action(trainer, else: call_kwargs["clip_actions"] = True - obs = np.clip(obs_space.sample(), -1.0, 1.0) + obs = obs_space.sample() + if isinstance(obs_space, gym.spaces.Box): + obs = np.clip(obs, -1.0, 1.0) state_in = None if include_state: state_in = pol.model.get_initial_state() diff --git a/rllib/utils/tests/test_framework_agnostic_components.py b/rllib/utils/tests/test_framework_agnostic_components.py index 400c16c40..5db64d7da 100644 --- a/rllib/utils/tests/test_framework_agnostic_components.py +++ b/rllib/utils/tests/test_framework_agnostic_components.py @@ -9,7 +9,7 @@ from ray.rllib.utils.framework import try_import_tf, try_import_torch from ray.rllib.utils.from_config import from_config from ray.rllib.utils.test_utils import check, framework_iterator -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() torch, _ = try_import_torch() @@ -136,7 +136,7 @@ class TestFrameWorkAgnosticComponents(unittest.TestCase): # Test recognizing default package path. scope = None if sess: - scope = tf.variable_scope("exploration_object") + scope = tf1.variable_scope("exploration_object") scope.__enter__() component = from_config( Exploration, { diff --git a/rllib/utils/tf_ops.py b/rllib/utils/tf_ops.py index abdef2fc8..c6d55fa0f 100644 --- a/rllib/utils/tf_ops.py +++ b/rllib/utils/tf_ops.py @@ -1,6 +1,6 @@ from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() def explained_variance(y, pred): @@ -86,7 +86,7 @@ def make_tf_callable(session_or_none, dynamic_shape=False): else: shape = v.shape placeholders.append( - tf.placeholder( + tf1.placeholder( dtype=v.dtype, shape=shape, name="arg_{}".format(i))) @@ -120,7 +120,7 @@ def scope_vars(scope, trainable_only=False): vars: [tf.Variable] list of variables in `scope`. """ - return tf.get_collection( - tf.GraphKeys.TRAINABLE_VARIABLES - if trainable_only else tf.GraphKeys.VARIABLES, + return tf1.get_collection( + tf1.GraphKeys.TRAINABLE_VARIABLES + if trainable_only else tf1.GraphKeys.VARIABLES, scope=scope if isinstance(scope, str) else scope.name) diff --git a/rllib/utils/tf_run_builder.py b/rllib/utils/tf_run_builder.py index 4d891fbfa..82b904bd1 100644 --- a/rllib/utils/tf_run_builder.py +++ b/rllib/utils/tf_run_builder.py @@ -5,7 +5,7 @@ import time from ray.util.debug import log_once from ray.rllib.utils.framework import try_import_tf -tf = try_import_tf() +tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -63,8 +63,8 @@ def run_timeline(sess, ops, debug_name, feed_dict={}, timeline_dir=None): if timeline_dir: from tensorflow.python.client import timeline - run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) - run_metadata = tf.RunMetadata() + run_options = tf1.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) + run_metadata = tf1.RunMetadata() start = time.time() fetches = sess.run( ops, diff --git a/scripts b/scripts deleted file mode 120000 index 8f67c5cc2..000000000 --- a/scripts +++ /dev/null @@ -1 +0,0 @@ -ci/travis \ No newline at end of file