[RLlib] Tf2x preparation; part 2 (upgrading try_import_tf()). (#9136)

* WIP.

* Fixes.

* LINT.

* WIP.

* WIP.

* Fixes.

* Fixes.

* Fixes.

* Fixes.

* WIP.

* Fixes.

* Test

* Fix.

* Fixes and LINT.

* Fixes and LINT.

* LINT.
This commit is contained in:
Sven Mika
2020-06-30 10:13:20 +02:00
committed by GitHub
parent fb074da7c3
commit 43043ee4d5
125 changed files with 617 additions and 584 deletions
+43 -23
View File
@@ -4,7 +4,7 @@ import numpy as np
from ray.rllib.utils import force_list
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
def unflatten(vector, shapes):
@@ -79,24 +79,29 @@ class TensorFlowVariables:
variable_names.append(tf_obj.node_def.name)
self.variables = OrderedDict()
variable_list = [
v for v in tf.global_variables()
v for v in tf1.global_variables()
if v.op.node_def.name in variable_names
]
if input_variables is not None:
variable_list += input_variables
for v in variable_list:
self.variables[v.op.node_def.name] = v
self.placeholders = {}
self.assignment_nodes = {}
if not tf1.executing_eagerly():
for v in variable_list:
self.variables[v.op.node_def.name] = v
# Create new placeholders to put in custom weights.
for k, var in self.variables.items():
self.placeholders[k] = tf.placeholder(
var.value().dtype,
var.get_shape().as_list(),
name="Placeholder_" + k)
self.assignment_nodes[k] = var.assign(self.placeholders[k])
self.placeholders = {}
self.assignment_nodes = {}
# Create new placeholders to put in custom weights.
for k, var in self.variables.items():
self.placeholders[k] = tf1.placeholder(
var.value().dtype,
var.get_shape().as_list(),
name="Placeholder_" + k)
self.assignment_nodes[k] = var.assign(self.placeholders[k])
else:
for v in variable_list:
self.variables[v.name] = v
def set_session(self, sess):
"""Sets the current session used by the class.
@@ -117,10 +122,12 @@ class TensorFlowVariables:
def _check_sess(self):
"""Checks if the session is set, and if not throw an error message."""
assert self.sess is not None, ("The session is not set. Set the "
"session either by passing it into the "
"TensorFlowVariables constructor or by "
"calling set_session(sess).")
if tf1.executing_eagerly():
return
assert self.sess is not None, \
"The session is not set. Set the session either by passing it " \
"into the TensorFlowVariables constructor or by calling " \
"set_session(sess)."
def get_flat(self):
"""Gets the weights and returns them as a flat array.
@@ -129,6 +136,11 @@ class TensorFlowVariables:
1D Array containing the flattened weights.
"""
self._check_sess()
# Eager mode.
if not self.sess:
return np.concatenate(
[v.numpy().flatten() for v in self.variables.values()])
# Graph mode.
return np.concatenate([
v.eval(session=self.sess).flatten()
for v in self.variables.values()
@@ -147,12 +159,16 @@ class TensorFlowVariables:
self._check_sess()
shapes = [v.get_shape().as_list() for v in self.variables.values()]
arrays = unflatten(new_weights, shapes)
placeholders = [
self.placeholders[k] for k, v in self.variables.items()
]
self.sess.run(
list(self.assignment_nodes.values()),
feed_dict=dict(zip(placeholders, arrays)))
if not self.sess:
for v, a in zip(self.variables.values(), arrays):
v.assign(a)
else:
placeholders = [
self.placeholders[k] for k, v in self.variables.items()
]
self.sess.run(
list(self.assignment_nodes.values()),
feed_dict=dict(zip(placeholders, arrays)))
def get_weights(self):
"""Returns a dictionary containing the weights of the network.
@@ -161,6 +177,10 @@ class TensorFlowVariables:
Dictionary mapping variable names to their weights.
"""
self._check_sess()
# Eager mode.
if not self.sess:
return self.variables
# Graph mode.
return self.sess.run(self.variables)
def set_weights(self, new_weights):
+8
View File
@@ -344,6 +344,7 @@ py_test(
args = ["--yaml-dir=tuned_examples/sac", "--torch"]
)
# TD3
py_test(
name = "run_regression_tests_pendulum_td3_tf",
@@ -1013,6 +1014,13 @@ py_test(
srcs = ["models/tests/test_distributions.py"]
)
py_test(
name = "test_attention_nets",
tags = ["models"],
size = "small",
srcs = ["models/tests/test_attention_nets.py"]
)
# --------------------------------------------------------------------
# Optimizers and Memories
# rllib/execution/
+1 -1
View File
@@ -9,7 +9,7 @@ from ray.rllib.policy.tf_policy import LearningRateSchedule
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class A3CLoss:
+4 -4
View File
@@ -13,7 +13,7 @@ from ray.rllib.utils.filter import get_filter
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.spaces.space_utils import unbatch
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class ARSTFPolicy:
@@ -29,8 +29,8 @@ class ARSTFPolicy:
self.single_threaded = config.get("single_threaded", False)
self.sess = make_session(single_threaded=self.single_threaded)
self.inputs = tf.placeholder(tf.float32,
[None] + list(self.preprocessor.shape))
self.inputs = tf1.placeholder(tf.float32,
[None] + list(self.preprocessor.shape))
# Policy network.
dist_class, dist_dim = ModelCatalog.get_action_dist(
@@ -52,7 +52,7 @@ class ARSTFPolicy:
self.num_params = sum(
np.prod(variable.shape.as_list())
for _, variable in self.variables.variables.items())
self.sess.run(tf.global_variables_initializer())
self.sess.run(tf1.global_variables_initializer())
def compute_actions(self,
observation,
+1 -1
View File
@@ -3,7 +3,7 @@ import numpy as np
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class DDPGTFModel(TFModelV2):
+20 -20
View File
@@ -22,7 +22,7 @@ from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.tf_ops import huber_loss, minimize_and_clip, \
make_tf_callable
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
logger = logging.getLogger(__name__)
@@ -126,18 +126,18 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
target_model_out_tp1, _ = policy.target_model(input_dict_next, [], None)
# Policy network evaluation.
with tf.variable_scope(POLICY_SCOPE, reuse=True):
# prev_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
with tf1.variable_scope(POLICY_SCOPE, reuse=True):
# prev_update_ops = set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS))
policy_t = model.get_policy_output(model_out_t)
# policy_batchnorm_update_ops = list(
# set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
# set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
with tf.variable_scope(POLICY_TARGET_SCOPE):
with tf1.variable_scope(POLICY_TARGET_SCOPE):
policy_tp1 = \
policy.target_model.get_policy_output(target_model_out_tp1)
# Action outputs.
with tf.variable_scope(ACTION_SCOPE, reuse=True):
with tf1.variable_scope(ACTION_SCOPE, reuse=True):
if policy.config["smooth_target_policy"]:
target_noise_clip = policy.config["target_noise_clip"]
clipped_normal_sample = tf.clip_by_value(
@@ -154,29 +154,29 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
policy_tp1_smoothed = policy_tp1
# Q-net(s) evaluation.
# prev_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
with tf.variable_scope(Q_SCOPE):
# prev_update_ops = set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS))
with tf1.variable_scope(Q_SCOPE):
# Q-values for given actions & observations in given current
q_t = model.get_q_values(model_out_t, train_batch[SampleBatch.ACTIONS])
with tf.variable_scope(Q_SCOPE, reuse=True):
with tf1.variable_scope(Q_SCOPE, reuse=True):
# Q-values for current policy (no noise) in given current state
q_t_det_policy = model.get_q_values(model_out_t, policy_t)
if twin_q:
with tf.variable_scope(TWIN_Q_SCOPE):
with tf1.variable_scope(TWIN_Q_SCOPE):
twin_q_t = model.get_twin_q_values(
model_out_t, train_batch[SampleBatch.ACTIONS])
# q_batchnorm_update_ops = list(
# set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
# set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
# Target q-net(s) evaluation.
with tf.variable_scope(Q_TARGET_SCOPE):
with tf1.variable_scope(Q_TARGET_SCOPE):
q_tp1 = policy.target_model.get_q_values(target_model_out_tp1,
policy_tp1_smoothed)
if twin_q:
with tf.variable_scope(TWIN_Q_TARGET_SCOPE):
with tf1.variable_scope(TWIN_Q_TARGET_SCOPE):
twin_q_tp1 = policy.target_model.get_twin_q_values(
target_model_out_tp1, policy_tp1_smoothed)
@@ -220,10 +220,10 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
if l2_reg is not None:
for var in policy.model.policy_variables():
if "bias" not in var.name:
actor_loss += (l2_reg * tf.nn.l2_loss(var))
actor_loss += (l2_reg * tf1.nn.l2_loss(var))
for var in policy.model.q_variables():
if "bias" not in var.name:
critic_loss += (l2_reg * tf.nn.l2_loss(var))
critic_loss += (l2_reg * tf1.nn.l2_loss(var))
# Model self-supervised losses.
if policy.config["use_state_preprocessor"]:
@@ -259,9 +259,9 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
def make_ddpg_optimizers(policy, config):
# Create separate optimizers for actor & critic losses.
policy._actor_optimizer = tf.train.AdamOptimizer(
policy._actor_optimizer = tf1.train.AdamOptimizer(
learning_rate=config["actor_lr"])
policy._critic_optimizer = tf.train.AdamOptimizer(
policy._critic_optimizer = tf1.train.AdamOptimizer(
learning_rate=config["critic_lr"])
return None
@@ -286,7 +286,7 @@ def build_apply_op(policy, optimizer, grads_and_vars):
# For policy gradient, update policy net one time v.s.
# update critic net `policy_delay` time(s).
should_apply_actor_opt = tf.equal(
tf.mod(policy.global_step, policy.config["policy_delay"]), 0)
tf.math.floormod(policy.global_step, policy.config["policy_delay"]), 0)
def make_apply_op():
return policy._actor_optimizer.apply_gradients(
@@ -299,7 +299,7 @@ def build_apply_op(policy, optimizer, grads_and_vars):
critic_op = policy._critic_optimizer.apply_gradients(
policy._critic_grads_and_vars)
# Increment global step & apply ops.
with tf.control_dependencies([tf.assign_add(policy.global_step, 1)]):
with tf1.control_dependencies([tf1.assign_add(policy.global_step, 1)]):
return tf.group(actor_op, critic_op)
@@ -341,7 +341,7 @@ def build_ddpg_stats(policy, batch):
def before_init_fn(policy, obs_space, action_space, config):
# Create global step for counting the number of update operations.
policy.global_step = tf.train.get_or_create_global_step()
policy.global_step = tf1.train.get_or_create_global_step()
class ComputeTDErrorMixin:
+4 -4
View File
@@ -49,10 +49,10 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
target_model_out_tp1, _ = policy.target_model(input_dict_next, [], None)
# Policy network evaluation.
# prev_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
# prev_update_ops = set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS))
policy_t = model.get_policy_output(model_out_t)
# policy_batchnorm_update_ops = list(
# set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
# set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
policy_tp1 = \
policy.target_model.get_policy_output(target_model_out_tp1)
@@ -73,7 +73,7 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
policy_tp1_smoothed = policy_tp1
# Q-net(s) evaluation.
# prev_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
# prev_update_ops = set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS))
# Q-values for given actions & observations in given current
q_t = model.get_q_values(model_out_t, train_batch[SampleBatch.ACTIONS])
@@ -86,7 +86,7 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
twin_q_t = model.get_twin_q_values(model_out_t,
train_batch[SampleBatch.ACTIONS])
# q_batchnorm_update_ops = list(
# set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
# set(tf1.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
# Target q-net(s) evaluation.
q_tp1 = policy.target_model.get_q_values(target_model_out_tp1,
+1 -1
View File
@@ -4,7 +4,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
_, tf, _ = try_import_tf()
class NoopModel(TFModelV2):
+7 -4
View File
@@ -6,7 +6,7 @@ from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check, check_compute_single_action, \
framework_iterator
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class TestTD3(unittest.TestCase):
@@ -32,8 +32,9 @@ class TestTD3(unittest.TestCase):
# Test against all frameworks.
for _ in framework_iterator(config, frameworks="tf"):
lcl_config = config.copy()
# Default GaussianNoise setup.
trainer = td3.TD3Trainer(config=config, env="Pendulum-v0")
trainer = td3.TD3Trainer(config=lcl_config, env="Pendulum-v0")
# Setting explore=False should always return the same action.
a_ = trainer.compute_action(obs, explore=False)
for _ in range(50):
@@ -44,9 +45,10 @@ class TestTD3(unittest.TestCase):
for _ in range(50):
actions.append(trainer.compute_action(obs))
check(np.std(actions), 0.0, false=True)
trainer.stop()
# Check randomness at beginning.
config["exploration_config"] = {
lcl_config["exploration_config"] = {
# Act randomly at beginning ...
"random_timesteps": 30,
# Then act very closely to deterministic actions thereafter.
@@ -54,7 +56,7 @@ class TestTD3(unittest.TestCase):
"initial_scale": 0.001,
"final_scale": 0.001,
}
trainer = td3.TD3Trainer(config=config, env="Pendulum-v0")
trainer = td3.TD3Trainer(config=lcl_config, env="Pendulum-v0")
# ts=1 (get a deterministic action as per explore=False).
deterministic_action = trainer.compute_action(obs, explore=False)
# ts=2-5 (in random window).
@@ -73,6 +75,7 @@ class TestTD3(unittest.TestCase):
for _ in range(50):
a = trainer.compute_action(obs, explore=False)
check(a, deterministic_action)
trainer.stop()
if __name__ == "__main__":
+20 -18
View File
@@ -3,7 +3,7 @@ import numpy as np
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class DistributionalQTFModel(TFModelV2):
@@ -155,7 +155,7 @@ class DistributionalQTFModel(TFModelV2):
units=num_atoms, activation=None)(state_out)
return state_score
if tf.executing_eagerly():
if tf1.executing_eagerly():
from tensorflow.python.ops import variable_scope
# Have to use a variable store to reuse variables in eager mode
store = variable_scope.EagerVariableStore()
@@ -163,30 +163,32 @@ class DistributionalQTFModel(TFModelV2):
# Save the scope objects, since in eager we will execute this
# path repeatedly and there is no guarantee it will always be run
# in the same original scope.
with tf.variable_scope(name + "/action_value") as action_scope:
with tf1.variable_scope(name + "/action_value") as action_scope:
pass
with tf.variable_scope(name + "/state_value") as state_scope:
with tf1.variable_scope(name + "/state_value") as state_scope:
pass
def build_action_value_in_scope(model_out):
with store.as_default():
with tf.variable_scope(action_scope, reuse=tf.AUTO_REUSE):
with tf1.variable_scope(
action_scope, reuse=tf1.AUTO_REUSE):
return build_action_value(model_out)
def build_state_score_in_scope(model_out):
with store.as_default():
with tf.variable_scope(state_scope, reuse=tf.AUTO_REUSE):
with tf1.variable_scope(
state_scope, reuse=tf1.AUTO_REUSE):
return build_state_score(model_out)
else:
def build_action_value_in_scope(model_out):
with tf.variable_scope(
name + "/action_value", reuse=tf.AUTO_REUSE):
with tf1.variable_scope(
name + "/action_value", reuse=tf1.AUTO_REUSE):
return build_action_value(model_out)
def build_state_score_in_scope(model_out):
with tf.variable_scope(
name + "/state_value", reuse=tf.AUTO_REUSE):
with tf1.variable_scope(
name + "/state_value", reuse=tf1.AUTO_REUSE):
return build_state_score(model_out)
q_out = build_action_value_in_scope(self.model_out)
@@ -241,33 +243,33 @@ class DistributionalQTFModel(TFModelV2):
epsilon_w = tf.matmul(
a=tf.expand_dims(epsilon_in, -1), b=tf.expand_dims(epsilon_out, 0))
epsilon_b = epsilon_out
sigma_w = tf.get_variable(
sigma_w = tf1.get_variable(
name=prefix + "_sigma_w",
shape=[in_size, out_size],
dtype=tf.float32,
initializer=tf.random_uniform_initializer(
initializer=tf1.random_uniform_initializer(
minval=-1.0 / np.sqrt(float(in_size)),
maxval=1.0 / np.sqrt(float(in_size))))
# TF noise generation can be unreliable on GPU
# If generating the noise on the CPU,
# lowering sigma0 to 0.1 may be helpful
sigma_b = tf.get_variable(
sigma_b = tf1.get_variable(
name=prefix + "_sigma_b",
shape=[out_size],
dtype=tf.float32, # 0.5~GPU, 0.1~CPU
initializer=tf.constant_initializer(
initializer=tf1.constant_initializer(
sigma0 / np.sqrt(float(in_size))))
w = tf.get_variable(
w = tf1.get_variable(
name=prefix + "_fc_w",
shape=[in_size, out_size],
dtype=tf.float32,
initializer=tf.initializers.glorot_uniform())
b = tf.get_variable(
initializer=tf.initializers.GlorotUniform())
b = tf1.get_variable(
name=prefix + "_fc_b",
shape=[out_size],
dtype=tf.float32,
initializer=tf.zeros_initializer())
initializer=tf.initializers.Zeros())
action_activation = \
tf.keras.layers.Lambda(lambda x: tf.matmul(
+2 -2
View File
@@ -17,7 +17,7 @@ from ray.rllib.utils.tf_ops import huber_loss, reduce_mean_ignore_inf, \
minimize_and_clip
from ray.rllib.utils.tf_ops import make_tf_callable
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
Q_SCOPE = "q_func"
Q_TARGET_SCOPE = "target_q_func"
@@ -253,7 +253,7 @@ def build_q_losses(policy, model, _, train_batch):
def adam_optimizer(policy, config):
return tf.train.AdamOptimizer(
return tf1.train.AdamOptimizer(
learning_rate=policy.cur_lr, epsilon=config["adam_epsilon"])
+1 -1
View File
@@ -1,7 +1,7 @@
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class SimpleQModel(TFModelV2):
+1 -1
View File
@@ -15,7 +15,7 @@ from ray.rllib.policy.tf_policy_template import build_tf_policy
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.tf_ops import huber_loss, make_tf_callable
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
logger = logging.getLogger(__name__)
Q_SCOPE = "q_func"
+1 -1
View File
@@ -7,7 +7,7 @@ from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check, check_compute_single_action, \
framework_iterator
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class TestDQN(unittest.TestCase):
+1 -1
View File
@@ -11,7 +11,7 @@ from ray.rllib.utils.numpy import fc, one_hot, huber_loss
from ray.rllib.utils.test_utils import check, check_compute_single_action, \
framework_iterator
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class TestSimpleQ(unittest.TestCase):
+7 -7
View File
@@ -14,7 +14,7 @@ from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space, \
unbatch
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
tree = try_import_tree()
@@ -60,9 +60,9 @@ def rollout(policy, env, timestep_limit=None, add_noise=False, offset=0.0):
def make_session(single_threaded):
if not single_threaded:
return tf.Session()
return tf.Session(
config=tf.ConfigProto(
return tf1.Session()
return tf1.Session(
config=tf1.ConfigProto(
inter_op_parallelism_threads=1, intra_op_parallelism_threads=1))
@@ -77,8 +77,8 @@ class ESTFPolicy:
self.preprocessor.shape)
self.single_threaded = config.get("single_threaded", False)
self.sess = make_session(single_threaded=self.single_threaded)
self.inputs = tf.placeholder(tf.float32,
[None] + list(self.preprocessor.shape))
self.inputs = tf1.placeholder(tf.float32,
[None] + list(self.preprocessor.shape))
# Policy network.
dist_class, dist_dim = ModelCatalog.get_action_dist(
@@ -98,7 +98,7 @@ class ESTFPolicy:
self.num_params = sum(
np.prod(variable.shape.as_list())
for _, variable in self.variables.variables.items())
self.sess.run(tf.global_variables_initializer())
self.sess.run(tf1.global_variables_initializer())
def compute_actions(self,
observation,
+1 -1
View File
@@ -6,7 +6,7 @@ from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check_compute_single_action, \
framework_iterator
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class TestIMPALA(unittest.TestCase):
+18 -18
View File
@@ -30,7 +30,7 @@ from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.numpy import softmax
from ray.rllib.utils.test_utils import check, framework_iterator
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()
@@ -185,20 +185,20 @@ class VtraceTest(unittest.TestCase):
# can deal with that.
inputs_ = {
# T, B, NUM_ACTIONS
"behaviour_policy_logits": tf.placeholder(
"behaviour_policy_logits": tf1.placeholder(
dtype=tf.float32, shape=[None, None, None]),
# T, B, NUM_ACTIONS
"target_policy_logits": tf.placeholder(
"target_policy_logits": tf1.placeholder(
dtype=tf.float32, shape=[None, None, None]),
"actions": tf.placeholder(
"actions": tf1.placeholder(
dtype=tf.int32, shape=[None, None]),
"discounts": tf.placeholder(
"discounts": tf1.placeholder(
dtype=tf.float32, shape=[None, None]),
"rewards": tf.placeholder(
"rewards": tf1.placeholder(
dtype=tf.float32, shape=[None, None]),
"values": tf.placeholder(
"values": tf1.placeholder(
dtype=tf.float32, shape=[None, None]),
"bootstrap_value": tf.placeholder(
"bootstrap_value": tf1.placeholder(
dtype=tf.float32, shape=[None]),
}
else:
@@ -282,15 +282,15 @@ class VtraceTest(unittest.TestCase):
vtrace = vtrace_tf if fw != "torch" else vtrace_torch
if fw == "tf":
inputs_ = {
"log_rhos": tf.placeholder(
"log_rhos": tf1.placeholder(
dtype=tf.float32, shape=[None, None, 1]),
"discounts": tf.placeholder(
"discounts": tf1.placeholder(
dtype=tf.float32, shape=[None, None, 1]),
"rewards": tf.placeholder(
"rewards": tf1.placeholder(
dtype=tf.float32, shape=[None, None, 42]),
"values": tf.placeholder(
"values": tf1.placeholder(
dtype=tf.float32, shape=[None, None, 42]),
"bootstrap_value": tf.placeholder(
"bootstrap_value": tf1.placeholder(
dtype=tf.float32, shape=[None, 42])
}
else:
@@ -310,16 +310,16 @@ class VtraceTest(unittest.TestCase):
vtrace = vtrace_tf if fw != "torch" else vtrace_torch
if fw == "tf":
inputs_ = {
"log_rhos": tf.placeholder(
"log_rhos": tf1.placeholder(
dtype=tf.float32, shape=[None, None, 1]),
"discounts": tf.placeholder(
"discounts": tf1.placeholder(
dtype=tf.float32, shape=[None, None, 1]),
"rewards": tf.placeholder(
"rewards": tf1.placeholder(
dtype=tf.float32, shape=[None, None, 42]),
"values": tf.placeholder(
"values": tf1.placeholder(
dtype=tf.float32, shape=[None, None, 42]),
# Should be [None, 42].
"bootstrap_value": tf.placeholder(
"bootstrap_value": tf1.placeholder(
dtype=tf.float32, shape=[None])
}
else:
+8 -7
View File
@@ -33,7 +33,7 @@ import collections
from ray.rllib.models.tf.tf_action_dist import Categorical
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
VTraceFromLogitsReturns = collections.namedtuple("VTraceFromLogitsReturns", [
"vs", "pg_advantages", "log_rhos", "behaviour_action_log_probs",
@@ -222,7 +222,7 @@ def multi_from_logits(behaviour_policy_logits,
behaviour_policy_logits[i].shape.assert_has_rank(3)
target_policy_logits[i].shape.assert_has_rank(3)
with tf.name_scope(
with tf1.name_scope(
name,
values=[
behaviour_policy_logits, target_policy_logits, actions,
@@ -332,21 +332,22 @@ def from_importance_weights(log_rhos,
if clip_pg_rho_threshold is not None:
clip_pg_rho_threshold.shape.assert_has_rank(0)
with tf.name_scope(
with tf1.name_scope(
name,
values=[log_rhos, discounts, rewards, values, bootstrap_value]):
rhos = tf.exp(log_rhos)
rhos = tf.math.exp(log_rhos)
if clip_rho_threshold is not None:
clipped_rhos = tf.minimum(
clip_rho_threshold, rhos, name="clipped_rhos")
tf.summary.histogram("clipped_rhos_1000", tf.minimum(1000.0, rhos))
tf.summary.scalar(
tf1.summary.histogram(
"clipped_rhos_1000", tf.minimum(1000.0, rhos))
tf1.summary.scalar(
"num_of_clipped_rhos",
tf.reduce_sum(
tf.cast(
tf.equal(clipped_rhos, clip_rho_threshold), tf.int32)))
tf.summary.scalar("size_of_clipped_rhos", tf.size(clipped_rhos))
tf1.summary.scalar("size_of_clipped_rhos", tf.size(clipped_rhos))
else:
clipped_rhos = rhos
+5 -4
View File
@@ -16,7 +16,7 @@ from ray.rllib.policy.tf_policy import LearningRateSchedule, \
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.tf_ops import explained_variance
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
logger = logging.getLogger(__name__)
@@ -253,10 +253,11 @@ def postprocess_trajectory(policy,
def choose_optimizer(policy, config):
if policy.config["opt_type"] == "adam":
return tf.train.AdamOptimizer(policy.cur_lr)
return tf1.train.AdamOptimizer(policy.cur_lr)
else:
return tf.train.RMSPropOptimizer(policy.cur_lr, config["decay"],
config["momentum"], config["epsilon"])
return tf1.train.RMSPropOptimizer(
policy.cur_lr,
config["decay"], config["momentum"], config["epsilon"])
def clip_gradients(policy, optimizer, loss):
+13 -13
View File
@@ -9,7 +9,7 @@ from ray.rllib.agents.ppo.ppo_tf_policy import postprocess_ppo_gae, \
vf_preds_fetches, clip_gradients, setup_config, ValueNetworkMixin
from ray.rllib.utils.framework import get_activation_fn
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
logger = logging.getLogger(__name__)
@@ -33,7 +33,7 @@ def PPOLoss(dist_class,
pi_new_logp = curr_dist.logp(actions)
pi_old_logp = prev_dist.logp(actions)
logp_ratio = tf.exp(pi_new_logp - pi_old_logp)
logp_ratio = tf.math.exp(pi_new_logp - pi_old_logp)
if clip_loss:
return tf.minimum(
advantages * logp_ratio,
@@ -49,10 +49,10 @@ def PPOLoss(dist_class,
def vf_loss(value_fn, value_targets, vf_preds, vf_clip_param=0.1):
# GAE Value Function Loss
vf_loss1 = tf.square(value_fn - value_targets)
vf_loss1 = tf.math.square(value_fn - value_targets)
vf_clipped = vf_preds + tf.clip_by_value(value_fn - vf_preds,
-vf_clip_param, vf_clip_param)
vf_loss2 = tf.square(vf_clipped - value_targets)
vf_loss2 = tf.math.square(vf_clipped - value_targets)
vf_loss = tf.maximum(vf_loss1, vf_loss2)
return vf_loss
@@ -104,7 +104,7 @@ class WorkerLoss(object):
vf_clip_param=vf_clip_param,
vf_loss_coeff=vf_loss_coeff,
clip_loss=clip_loss)
self.loss = tf.Print(self.loss, ["Worker Adapt Loss", self.loss])
self.loss = tf1.Print(self.loss, ["Worker Adapt Loss", self.loss])
# This is the Meta-Update computation graph for main (meta-update step)
@@ -230,7 +230,7 @@ class MAMLLoss(object):
tf.multiply(self.cur_kl_coeff, mean_inner_kl))
self.loss = tf.reduce_mean(tf.stack(ppo_obj,
axis=0)) + self.inner_kl_loss
self.loss = tf.Print(
self.loss = tf1.Print(
self.loss,
["Meta-Loss", self.loss, "Inner KL", self.mean_inner_kl])
@@ -309,7 +309,7 @@ class MAMLLoss(object):
def maml_loss(policy, model, dist_class, train_batch):
logits, state = model.from_batch(train_batch)
policy._loss_input_dict["split"] = tf.placeholder(
policy._loss_input_dict["split"] = tf1.placeholder(
tf.int32,
name="Meta-Update-Splitting",
shape=(policy.config["inner_adaptation_steps"] + 1,
@@ -333,8 +333,8 @@ def maml_loss(policy, model, dist_class, train_batch):
vf_loss_coeff=policy.config["vf_loss_coeff"],
clip_loss=False)
else:
policy.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
tf.get_variable_scope().name)
policy.var_list = tf1.get_collection(tf1.GraphKeys.TRAINABLE_VARIABLES,
tf1.get_variable_scope().name)
policy.loss_obj = MAMLLoss(
model=model,
dist_class=dist_class,
@@ -380,8 +380,8 @@ class KLCoeffMixin:
self.kl_coeff_val = [config["kl_coeff"]
] * config["inner_adaptation_steps"]
self.kl_target = self.config["kl_target"]
self.kl_coeff = tf.get_variable(
initializer=tf.constant_initializer(self.kl_coeff_val),
self.kl_coeff = tf1.get_variable(
initializer=tf.keras.initializers.Constant(self.kl_coeff_val),
name="kl_coeff",
shape=(config["inner_adaptation_steps"]),
trainable=False,
@@ -404,8 +404,8 @@ def maml_optimizer_fn(policy, config):
Meta-Policy uses Adam optimizer for meta-update
"""
if not config["worker_index"]:
return tf.train.AdamOptimizer(learning_rate=config["lr"])
return tf.train.GradientDescentOptimizer(learning_rate=config["inner_lr"])
return tf1.train.AdamOptimizer(learning_rate=config["lr"])
return tf1.train.GradientDescentOptimizer(learning_rate=config["inner_lr"])
def setup_mixins(policy, obs_space, action_space, config):
+4 -4
View File
@@ -6,7 +6,7 @@ from ray.rllib.policy.tf_policy_template import build_tf_policy
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class ValueNetworkMixin:
@@ -37,13 +37,13 @@ class ReweightedImitationLoss:
# advantage estimation
adv = cumulative_rewards - state_values
# update averaged advantage norm
update_adv_norm = tf.assign_add(
update_adv_norm = tf1.assign_add(
ref=policy._ma_adv_norm,
value=1e-6 * (
tf.reduce_mean(tf.math.square(adv)) - policy._ma_adv_norm))
# exponentially weighted advantages
with tf.control_dependencies([update_adv_norm]):
with tf1.control_dependencies([update_adv_norm]):
exp_advs = tf.math.exp(beta * tf.math.divide(
adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
@@ -125,7 +125,7 @@ def setup_mixins(policy, obs_space, action_space, config):
ValueNetworkMixin.__init__(policy)
# Set up a tf-var for the moving avg (do this here to make it work with
# eager mode).
policy._ma_adv_norm = tf.get_variable(
policy._ma_adv_norm = tf1.get_variable(
name="moving_average_of_advantage_norm",
dtype=tf.float32,
initializer=100.0,
+1 -1
View File
@@ -6,7 +6,7 @@ from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check_compute_single_action, \
framework_iterator
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class TestMARWIL(unittest.TestCase):
+1 -1
View File
@@ -5,7 +5,7 @@ from ray.rllib.policy.tf_policy_template import build_tf_policy
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
def post_process_advantages(policy,
+3 -3
View File
@@ -21,7 +21,7 @@ from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
POLICY_SCOPE = "func"
TARGET_POLICY_SCOPE = "target_func"
@@ -65,7 +65,7 @@ class PPOSurrogateLoss:
def reduce_mean_valid(t):
return tf.reduce_mean(tf.boolean_mask(t, valid_mask))
logp_ratio = tf.exp(actions_logp - prev_actions_logp)
logp_ratio = tf.math.exp(actions_logp - prev_actions_logp)
surrogate_loss = tf.minimum(
advantages * logp_ratio,
@@ -170,7 +170,7 @@ class VTraceSurrogateLoss:
tf.float32))
self.is_ratio = tf.clip_by_value(
tf.exp(prev_actions_logp - old_policy_actions_logp), 0.0, 2.0)
tf.math.exp(prev_actions_logp - old_policy_actions_logp), 0.0, 2.0)
logp_ratio = self.is_ratio * tf.exp(actions_logp - prev_actions_logp)
advantages = self.vtrace_returns.pg_advantages
-3
View File
@@ -7,9 +7,6 @@ from ray.rllib.execution.rollout_ops import ParallelRollouts, ConcatBatches, \
StandardizeFields, SelectExperiences
from ray.rllib.execution.train_ops import TrainOneStep, TrainTFMultiGPU
from ray.rllib.execution.metric_ops import StandardMetricsReporting
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
logger = logging.getLogger(__name__)
+3 -3
View File
@@ -10,7 +10,7 @@ from ray.rllib.policy.tf_policy_template import build_tf_policy
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
logger = logging.getLogger(__name__)
@@ -174,7 +174,7 @@ def postprocess_ppo_gae(policy,
else:
next_state = []
for i in range(policy.num_state_tensors()):
next_state.append([sample_batch["state_out_{}".format(i)][-1]])
next_state.append(sample_batch["state_out_{}".format(i)][-1])
last_r = policy._value(sample_batch[SampleBatch.NEXT_OBS][-1],
sample_batch[SampleBatch.ACTIONS][-1],
sample_batch[SampleBatch.REWARDS][-1],
@@ -206,7 +206,7 @@ class KLCoeffMixin:
# KL Coefficient
self.kl_coeff_val = config["kl_coeff"]
self.kl_target = config["kl_target"]
self.kl_coeff = tf.get_variable(
self.kl_coeff = tf1.get_variable(
initializer=tf.constant_initializer(self.kl_coeff_val),
name="kl_coeff",
shape=(),
+1 -1
View File
@@ -194,7 +194,7 @@ class ValueNetworkMixin:
SampleBatch.PREV_REWARDS: convert_to_torch_tensor(
np.asarray([prev_reward])),
"is_training": False,
}, [convert_to_torch_tensor(np.asarray(s)) for s in state],
}, [convert_to_torch_tensor(np.asarray([s])) for s in state],
convert_to_torch_tensor(np.asarray([1])))
return self.model.value_function()[0]
-3
View File
@@ -2,12 +2,9 @@ import unittest
import ray
import ray.rllib.agents.ppo as ppo
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check_compute_single_action, \
framework_iterator
tf = try_import_tf()
class TestAPPO(unittest.TestCase):
@classmethod
-3
View File
@@ -2,12 +2,9 @@ import unittest
import ray
import ray.rllib.agents.ppo as ppo
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check_compute_single_action, \
framework_iterator
tf = try_import_tf()
class TestDDPPO(unittest.TestCase):
@classmethod
+1 -3
View File
@@ -13,12 +13,10 @@ from ray.rllib.models.tf.tf_action_dist import Categorical
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models.torch.torch_action_dist import TorchCategorical
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.numpy import fc
from ray.rllib.utils.test_utils import check, framework_iterator, \
check_compute_single_action
tf = try_import_tf()
# Fake CartPole episode of n time steps.
FAKE_BATCH = {
@@ -40,7 +38,7 @@ FAKE_BATCH = {
class TestPPO(unittest.TestCase):
@classmethod
def setUpClass(cls):
ray.init()
ray.init(local_mode=True)
@classmethod
def tearDownClass(cls):
+1 -1
View File
@@ -4,7 +4,7 @@ import numpy as np
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class SACTFModel(TFModelV2):
+14 -14
View File
@@ -17,7 +17,7 @@ from ray.rllib.utils.error import UnsupportedSpaceException
from ray.rllib.utils.framework import try_import_tf, try_import_tfp
from ray.rllib.utils.tf_ops import minimize_and_clip
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
tfp = try_import_tfp()
logger = logging.getLogger(__name__)
@@ -138,10 +138,10 @@ def sac_actor_critic_loss(policy, model, _, train_batch):
if model.discrete:
# Get all action probs directly from pi and form their logp.
log_pis_t = tf.nn.log_softmax(model.get_policy_output(model_out_t), -1)
policy_t = tf.exp(log_pis_t)
policy_t = tf.math.exp(log_pis_t)
log_pis_tp1 = tf.nn.log_softmax(
model.get_policy_output(model_out_tp1), -1)
policy_tp1 = tf.exp(log_pis_tp1)
policy_tp1 = tf.math.exp(log_pis_tp1)
# Q-values.
q_t = model.get_q_values(model_out_t)
# Target Q-values.
@@ -219,20 +219,20 @@ def sac_actor_critic_loss(policy, model, _, train_batch):
policy.config["gamma"]**policy.config["n_step"] * q_tp1_best_masked)
# Compute the TD-error (potentially clipped).
base_td_error = tf.abs(q_t_selected - q_t_selected_target)
base_td_error = tf.math.abs(q_t_selected - q_t_selected_target)
if policy.config["twin_q"]:
twin_td_error = tf.abs(twin_q_t_selected - q_t_selected_target)
twin_td_error = tf.math.abs(twin_q_t_selected - q_t_selected_target)
td_error = 0.5 * (base_td_error + twin_td_error)
else:
td_error = base_td_error
critic_loss = [
tf.losses.mean_squared_error(
tf1.losses.mean_squared_error(
labels=q_t_selected_target, predictions=q_t_selected, weights=0.5)
]
if policy.config["twin_q"]:
critic_loss.append(
tf.losses.mean_squared_error(
tf1.losses.mean_squared_error(
labels=q_t_selected_target,
predictions=twin_q_t_selected,
weights=0.5))
@@ -274,7 +274,7 @@ def sac_actor_critic_loss(policy, model, _, train_batch):
# in a custom apply op we handle the losses separately, but return them
# combined in one loss for now
return actor_loss + tf.add_n(critic_loss) + alpha_loss
return actor_loss + tf.math.add_n(critic_loss) + alpha_loss
def gradients(policy, optimizer, loss):
@@ -358,7 +358,7 @@ def apply_gradients(policy, optimizer, grads_and_vars):
alpha_apply_ops = policy._alpha_optimizer.apply_gradients(
policy._alpha_grads_and_vars,
global_step=tf.train.get_or_create_global_step())
global_step=tf1.train.get_or_create_global_step())
return tf.group([actor_apply_ops, alpha_apply_ops] + critic_apply_ops)
@@ -381,20 +381,20 @@ def stats(policy, train_batch):
class ActorCriticOptimizerMixin:
def __init__(self, config):
# create global step for counting the number of update operations
self.global_step = tf.train.get_or_create_global_step()
self.global_step = tf1.train.get_or_create_global_step()
# use separate optimizers for actor & critic
self._actor_optimizer = tf.train.AdamOptimizer(
self._actor_optimizer = tf1.train.AdamOptimizer(
learning_rate=config["optimization"]["actor_learning_rate"])
self._critic_optimizer = [
tf.train.AdamOptimizer(
tf1.train.AdamOptimizer(
learning_rate=config["optimization"]["critic_learning_rate"])
]
if config["twin_q"]:
self._critic_optimizer.append(
tf.train.AdamOptimizer(learning_rate=config["optimization"][
tf1.train.AdamOptimizer(learning_rate=config["optimization"][
"critic_learning_rate"]))
self._alpha_optimizer = tf.train.AdamOptimizer(
self._alpha_optimizer = tf1.train.AdamOptimizer(
learning_rate=config["optimization"]["entropy_learning_rate"])
+1 -2
View File
@@ -11,13 +11,12 @@ from ray.rllib.models.tf.tf_action_dist import SquashedGaussian
from ray.rllib.models.torch.torch_action_dist import TorchSquashedGaussian
from ray.rllib.execution.replay_buffer import LocalReplayBuffer
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.framework import try_import_torch
from ray.rllib.utils.numpy import fc, relu
from ray.rllib.utils.test_utils import check, check_compute_single_action, \
framework_iterator
from ray.rllib.utils.torch_ops import convert_to_torch_tensor
tf = try_import_tf()
torch, _ = try_import_torch()
+7 -7
View File
@@ -35,7 +35,7 @@ from ray.tune.resources import Resources
from ray.tune.logger import Logger, UnifiedLogger
from ray.tune.result import DEFAULT_RESULTS_DIR
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
logger = logging.getLogger(__name__)
@@ -595,12 +595,12 @@ class Trainer(Trainable):
self.config.pop("eager")
# Enable eager/tracing support.
if tf and self.config["framework"] == "tfe":
if not tf.executing_eagerly():
tf.enable_eager_execution()
if tf1 and self.config["framework"] == "tfe":
if not tf1.executing_eagerly():
tf1.enable_eager_execution()
logger.info("Executing eagerly, with eager_tracing={}".format(
self.config["eager_tracing"]))
if tf and not tf.executing_eagerly() and \
if tf1 and not tf1.executing_eagerly() and \
self.config["framework"] != "torch":
logger.info("Tip: set framework=tfe or the --eager flag to enable "
"TensorFlow eager execution")
@@ -634,8 +634,8 @@ class Trainer(Trainable):
logging.getLogger("ray.rllib").setLevel(self.config["log_level"])
def get_scope():
if tf and not tf.executing_eagerly():
return tf.Graph().as_default()
if tf1 and not tf1.executing_eagerly():
return tf1.Graph().as_default()
else:
return open(os.devnull) # fake a no-op scope
@@ -12,14 +12,13 @@ from ray.rllib.execution.metric_ops import StandardMetricsReporting
from ray.rllib.models.catalog import ModelCatalog
from ray.rllib.models.model import restore_original_dimensions
from ray.rllib.models.torch.torch_action_dist import TorchCategorical
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.framework import try_import_torch
from ray.tune.registry import ENV_CREATOR, _global_registry
from ray.rllib.contrib.alpha_zero.core.alpha_zero_policy import AlphaZeroPolicy
from ray.rllib.contrib.alpha_zero.core.mcts import MCTS
from ray.rllib.contrib.alpha_zero.core.ranked_rewards import get_r2_env_wrapper
tf = try_import_tf()
torch, nn = try_import_torch()
logger = logging.getLogger(__name__)
+23 -21
View File
@@ -15,7 +15,7 @@ import numpy as np
logger = logging.getLogger(__name__)
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
tfp = try_import_tfp()
@@ -49,7 +49,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
# _____ Initial Configuration
config = dict(ray.rllib.contrib.maddpg.DEFAULT_CONFIG, **config)
self.config = config
self.global_step = tf.train.get_or_create_global_step()
self.global_step = tf1.train.get_or_create_global_step()
# FIXME: Get done from info is required since agentwise done is not
# supported now.
@@ -88,7 +88,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
# Placeholders for policy evaluation and updates
def _make_ph_n(space_n, name=""):
return [
tf.placeholder(
tf1.placeholder(
tf.float32,
shape=(None, ) + space.shape,
name=name + "_%d" % i) for i, space in enumerate(space_n)
@@ -98,9 +98,9 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
act_ph_n = _make_ph_n(act_space_n, "actions")
new_obs_ph_n = _make_ph_n(obs_space_n, "new_obs")
new_act_ph_n = _make_ph_n(act_space_n, "new_actions")
rew_ph = tf.placeholder(
rew_ph = tf1.placeholder(
tf.float32, shape=None, name="rewards_{}".format(agent_id))
done_ph = tf.placeholder(
done_ph = tf1.placeholder(
tf.float32, shape=None, name="dones_{}".format(agent_id))
if config["use_local_critic"]:
@@ -190,12 +190,12 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
# _____ Optimizers
self.optimizers = {
"critic": tf.train.AdamOptimizer(config["critic_lr"]),
"actor": tf.train.AdamOptimizer(config["actor_lr"])
"critic": tf1.train.AdamOptimizer(config["critic_lr"]),
"actor": tf1.train.AdamOptimizer(config["actor_lr"])
}
# _____ Build variable update ops.
self.tau = tf.placeholder_with_default(
self.tau = tf1.placeholder_with_default(
config["tau"], shape=(), name="tau")
def _make_target_update_op(vs, target_vs, tau):
@@ -213,7 +213,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
for v in variables.values():
vs += v
phs = [
tf.placeholder(
tf1.placeholder(
tf.float32,
shape=v.get_shape(),
name=v.name.split(":")[0] + "_ph") for v in vs
@@ -230,7 +230,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
# _____ TensorFlow Initialization
self.sess = tf.get_default_session()
self.sess = tf1.get_default_session()
def _make_loss_inputs(placeholders):
return [(ph.name.split("/")[-1].split(":")[0], ph)
@@ -251,7 +251,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
loss_inputs=loss_inputs,
dist_inputs=actor_feature)
self.sess.run(tf.global_variables_initializer())
self.sess.run(tf1.global_variables_initializer())
# Hard initial update
self.update_target(1.0)
@@ -280,8 +280,8 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
critic_apply_op = self.optimizers["critic"].apply_gradients(
self.gvs["critic"])
with tf.control_dependencies([tf.assign_add(self.global_step, 1)]):
with tf.control_dependencies([critic_apply_op]):
with tf1.control_dependencies([tf1.assign_add(self.global_step, 1)]):
with tf1.control_dependencies([critic_apply_op]):
actor_apply_op = self.optimizers["actor"].apply_gradients(
self.gvs["actor"])
@@ -324,7 +324,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
hiddens,
activation=None,
scope=None):
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope:
with tf1.variable_scope(scope, reuse=tf1.AUTO_REUSE) as scope:
if use_state_preprocessor:
model_n = [
ModelCatalog.get_model({
@@ -341,11 +341,12 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
out = tf.concat(obs_n + act_n, axis=1)
for hidden in hiddens:
out = tf.layers.dense(out, units=hidden, activation=activation)
out = tf1.layers.dense(
out, units=hidden, activation=activation)
feature = out
out = tf.layers.dense(feature, units=1, activation=None)
out = tf1.layers.dense(feature, units=1, activation=None)
return out, feature, model_n, tf.global_variables(scope.name)
return out, feature, model_n, tf1.global_variables(scope.name)
def _build_actor_network(self,
obs,
@@ -355,7 +356,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
hiddens,
activation=None,
scope=None):
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope:
with tf1.variable_scope(scope, reuse=tf1.AUTO_REUSE) as scope:
if use_state_preprocessor:
model = ModelCatalog.get_model({
"obs": obs,
@@ -367,13 +368,14 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
out = obs
for hidden in hiddens:
out = tf.layers.dense(out, units=hidden, activation=activation)
feature = tf.layers.dense(
out = tf1.layers.dense(
out, units=hidden, activation=activation)
feature = tf1.layers.dense(
out, units=act_space.shape[0], activation=None)
sampler = tfp.distributions.RelaxedOneHotCategorical(
temperature=1.0, logits=feature).sample()
return sampler, feature, model, tf.global_variables(scope.name)
return sampler, feature, model, tf1.global_variables(scope.name)
def update_target(self, tau=None):
if tau is not None:
+16 -16
View File
@@ -50,7 +50,7 @@ if TYPE_CHECKING:
# Generic type var for foreach_* methods.
T = TypeVar("T")
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, _ = try_import_torch()
logger = logging.getLogger(__name__)
@@ -283,12 +283,12 @@ class RolloutWorker(ParallelIteratorWorker):
ParallelIteratorWorker.__init__(self, gen_rollouts, False)
policy_config: TrainerConfigDict = policy_config or {}
if (tf and policy_config.get("framework") == "tfe"
if (tf1 and policy_config.get("framework") == "tfe"
and not policy_config.get("no_eager_on_workers")
# This eager check is necessary for certain all-framework tests
# that use tf's eager_mode() context generator.
and not tf.executing_eagerly()):
tf.enable_eager_execution()
and not tf1.executing_eagerly()):
tf1.enable_eager_execution()
if log_level:
logging.getLogger("ray.rllib").setLevel(log_level)
@@ -382,21 +382,21 @@ class RolloutWorker(ParallelIteratorWorker):
torch.manual_seed(seed)
except AssertionError:
logger.info("Could not seed torch")
if _has_tensorflow_graph(policy_dict) and not (tf and
tf.executing_eagerly()):
if not tf:
if _has_tensorflow_graph(policy_dict) and not (
tf1 and tf1.executing_eagerly()):
if not tf1:
raise ImportError("Could not import tensorflow")
with tf.Graph().as_default():
with tf1.Graph().as_default():
if tf_session_creator:
self.tf_sess = tf_session_creator()
else:
self.tf_sess = tf.Session(
config=tf.ConfigProto(
gpu_options=tf.GPUOptions(allow_growth=True)))
self.tf_sess = tf1.Session(
config=tf1.ConfigProto(
gpu_options=tf1.GPUOptions(allow_growth=True)))
with self.tf_sess.as_default():
# set graph-level seed
if seed is not None:
tf.set_random_seed(seed)
tf1.set_random_seed(seed)
self.policy_map, self.preprocessors = \
self._build_policy_map(policy_dict, policy_config)
if (ray.is_initialized()
@@ -406,7 +406,7 @@ class RolloutWorker(ParallelIteratorWorker):
"Creating policy evaluation worker {}".format(
worker_index) +
" on CPU (please ignore any CUDA init errors)")
elif not tf.test.is_gpu_available():
elif not tf1.test.is_gpu_available():
raise RuntimeError(
"GPUs were assigned to this worker by Ray, but "
"TensorFlow reports GPU acceleration is disabled. "
@@ -956,7 +956,7 @@ class RolloutWorker(ParallelIteratorWorker):
"Found raw Tuple|Dict space as input to policy. "
"Please preprocess these observations with a "
"Tuple|DictFlatteningPreprocessor.")
if tf and tf.executing_eagerly():
if tf1 and tf1.executing_eagerly():
if hasattr(cls, "as_eager"):
cls = cls.as_eager()
if policy_config["eager_tracing"]:
@@ -966,8 +966,8 @@ class RolloutWorker(ParallelIteratorWorker):
else:
raise ValueError("This policy does not support eager "
"execution: {}".format(cls))
if tf:
with tf.variable_scope(name):
if tf1:
with tf1.variable_scope(name):
policy_map[name] = cls(obs_space, act_space, merged_conf)
else:
policy_map[name] = cls(obs_space, act_space, merged_conf)
+3 -3
View File
@@ -14,7 +14,7 @@ from ray.rllib.utils import merge_dicts
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.types import PolicyID, TrainerConfigDict, EnvType
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
logger = logging.getLogger(__name__)
@@ -202,8 +202,8 @@ class WorkerSet:
def session_creator():
logger.debug("Creating TF session {}".format(
config["tf_session_args"]))
return tf.Session(
config=tf.ConfigProto(**config["tf_session_args"]))
return tf1.Session(
config=tf1.ConfigProto(**config["tf_session_args"]))
if isinstance(config["input"], FunctionType):
input_creator = config["input"]
+1 -1
View File
@@ -11,7 +11,7 @@ from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check_learning_achieved
from ray.tune import registry
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
parser = argparse.ArgumentParser()
parser.add_argument("--run", type=str, default="PPO")
+1 -1
View File
@@ -4,7 +4,7 @@ import numpy as np
from rllib.models.tf.attention_net import TrXLNet
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
def bit_shift_generator(seq_length, shift, batch_size):
+1 -1
View File
@@ -10,7 +10,7 @@ from ray.rllib.models import ModelCatalog
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check_learning_achieved
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
parser = argparse.ArgumentParser()
parser.add_argument("--run", type=str, default="PPO")
+1 -1
View File
@@ -39,7 +39,7 @@ from ray.rllib.utils.test_utils import check_learning_achieved
from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable
from ray.rllib.utils.torch_ops import convert_to_torch_tensor
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()
OPPONENT_OBS = "opponent_obs"
+1 -1
View File
@@ -23,7 +23,7 @@ from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.test_utils import check_learning_achieved
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()
parser = argparse.ArgumentParser()
+1 -1
View File
@@ -12,7 +12,7 @@ from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.models.tf.visionnet import VisionNetwork as MyVisionNetwork
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
parser = argparse.ArgumentParser()
parser.add_argument("--run", type=str, default="DQN") # Try PG, PPO, DQN
+1 -1
View File
@@ -21,7 +21,7 @@ from ray.rllib.examples.models.custom_loss_model import CustomLossModel, \
from ray.rllib.models import ModelCatalog
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
parser = argparse.ArgumentParser()
parser.add_argument("--torch", action="store_true")
+1 -1
View File
@@ -7,7 +7,7 @@ from ray.rllib.evaluation.postprocessing import discount
from ray.rllib.policy.tf_policy_template import build_tf_policy
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
parser = argparse.ArgumentParser()
parser.add_argument("--stop-iters", type=int, default=200)
+1 -1
View File
@@ -11,7 +11,7 @@ from ray.rllib.policy.tf_policy_template import build_tf_policy
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check_learning_achieved
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
parser = argparse.ArgumentParser()
parser.add_argument("--stop-iters", type=int, default=200)
+10 -10
View File
@@ -6,7 +6,7 @@ import ray
from ray.rllib.agents.registry import get_agent_class
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
ray.init(num_cpus=10)
@@ -25,14 +25,14 @@ def train_and_export(algo_name, num_steps, model_dir, ckpt_dir, prefix):
def restore_saved_model(export_dir):
signature_key = \
tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
g = tf.Graph()
tf1.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
g = tf1.Graph()
with g.as_default():
with tf.Session(graph=g) as sess:
with tf1.Session(graph=g) as sess:
meta_graph_def = \
tf.saved_model.load(sess,
[tf.saved_model.tag_constants.SERVING],
export_dir)
tf1.saved_model.load(sess,
[tf1.saved_model.tag_constants.SERVING],
export_dir)
print("Model restored!")
print("Signature Def Information:")
print(meta_graph_def.signature_def[signature_key])
@@ -41,13 +41,13 @@ def restore_saved_model(export_dir):
def restore_checkpoint(export_dir, prefix):
sess = tf.Session()
sess = tf1.Session()
meta_file = "%s.meta" % prefix
saver = tf.train.import_meta_graph(os.path.join(export_dir, meta_file))
saver = tf1.train.import_meta_graph(os.path.join(export_dir, meta_file))
saver.restore(sess, os.path.join(export_dir, prefix))
print("Checkpoint restored!")
print("Variables Information:")
for v in tf.trainable_variables():
for v in tf1.trainable_variables():
value = sess.run(v)
print(v.name, value)
+1 -1
View File
@@ -13,7 +13,7 @@ from ray.rllib.examples.models.mobilenet_v2_with_lstm_models import \
from ray.rllib.models import ModelCatalog
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
cnn_shape = (4, 4, 3)
# The torch version of MobileNetV2 does channels first.
@@ -3,7 +3,7 @@ from ray.rllib.models.torch.torch_action_dist import TorchCategorical, \
TorchDistributionWrapper
from ray.rllib.utils.framework import try_import_tf, try_import_torch
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()
@@ -7,7 +7,7 @@ from ray.rllib.models.torch.misc import SlimFC
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.framework import try_import_tf, try_import_torch
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()
+8 -8
View File
@@ -9,7 +9,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf, try_import_torch
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()
@@ -39,27 +39,27 @@ class BatchNormModel(TFModelV2):
def forward(self, input_dict, state, seq_lens):
last_layer = input_dict["obs"]
hiddens = [256, 256]
with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
with tf1.variable_scope("model", reuse=tf1.AUTO_REUSE):
for i, size in enumerate(hiddens):
last_layer = tf.layers.dense(
last_layer = tf1.layers.dense(
last_layer,
size,
kernel_initializer=normc_initializer(1.0),
activation=tf.nn.tanh,
name="fc{}".format(i))
# Add a batch norm layer
last_layer = tf.layers.batch_normalization(
last_layer = tf1.layers.batch_normalization(
last_layer,
training=input_dict["is_training"],
name="bn_{}".format(i))
output = tf.layers.dense(
output = tf1.layers.dense(
last_layer,
self.num_outputs,
kernel_initializer=normc_initializer(0.01),
activation=None,
name="out")
self._value_out = tf.layers.dense(
self._value_out = tf1.layers.dense(
last_layer,
1,
kernel_initializer=normc_initializer(1.0),
@@ -67,8 +67,8 @@ class BatchNormModel(TFModelV2):
name="vf")
if not self._registered:
self.register_variables(
tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, scope=".+/model/.+"))
tf1.get_collection(
tf1.GraphKeys.TRAINABLE_VARIABLES, scope=".+/model/.+"))
self._registered = True
return output, []
@@ -9,7 +9,7 @@ from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf, try_import_torch
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()
+2 -2
View File
@@ -10,7 +10,7 @@ from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.offline import JsonReader
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()
@@ -73,7 +73,7 @@ class DeprecatedCustomLossModelV1(Model):
def _build_layers_v2(self, input_dict, num_outputs, options):
self.obs_in = input_dict["obs"]
with tf.variable_scope("shared", reuse=tf.AUTO_REUSE):
with tf1.variable_scope("shared", reuse=tf1.AUTO_REUSE):
self.fcnet = FullyConnectedNetwork(input_dict, self.obs_space,
self.action_space, num_outputs,
options)
+2 -2
View File
@@ -6,7 +6,7 @@ from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class EagerModel(TFModelV2):
@@ -34,7 +34,7 @@ class EagerModel(TFModelV2):
def lambda_(x):
eager_out = tf.py_function(self.forward_eager, [x], tf.float32)
with tf.control_dependencies([eager_out]):
with tf1.control_dependencies([eager_out]):
eager_out.set_shape(x.shape)
return eager_out
+6 -6
View File
@@ -5,7 +5,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf, try_import_torch
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()
@@ -25,11 +25,11 @@ class FastModel(TFModelV2):
@override(ModelV2)
def forward(self, input_dict, state, seq_lens):
with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
bias = tf.get_variable(
with tf1.variable_scope("model", reuse=tf1.AUTO_REUSE):
bias = tf1.get_variable(
dtype=tf.float32,
name="bias",
initializer=tf.zeros_initializer,
initializer=tf.keras.initializers.Zeros(),
shape=())
output = bias + \
tf.zeros([tf.shape(input_dict["obs"])[0], self.num_outputs])
@@ -37,8 +37,8 @@ class FastModel(TFModelV2):
if not self._registered:
self.register_variables(
tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, scope=".+/model/.+"))
tf1.get_collection(
tf1.GraphKeys.TRAINABLE_VARIABLES, scope=".+/model/.+"))
self._registered = True
return output, []
@@ -7,7 +7,7 @@ from ray.rllib.models.torch.recurrent_net import RecurrentNetwork as TorchRNN
from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf, try_import_torch
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()
@@ -9,7 +9,7 @@ from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.numpy import LARGE_INTEGER
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()
+1 -1
View File
@@ -7,7 +7,7 @@ from ray.rllib.models.torch.recurrent_net import RecurrentNetwork as TorchRNN
from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf, try_import_torch
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()
+9 -6
View File
@@ -8,13 +8,15 @@ from ray.rllib.models.tf.recurrent_net import RecurrentNetwork
from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class SpyLayer(tf.keras.layers.Layer):
"""A keras Layer, which intercepts its inputs and stored them as pickled.
"""
output = np.array(0, dtype=np.int64)
def __init__(self, num_outputs, **kwargs):
super().__init__(**kwargs)
@@ -26,7 +28,7 @@ class SpyLayer(tf.keras.layers.Layer):
"""
del kwargs
spy_fn = tf.py_func(
spy_fn = tf1.py_func(
self.spy,
[
inputs[0], # observations
@@ -36,11 +38,11 @@ class SpyLayer(tf.keras.layers.Layer):
inputs[5], # h_out
inputs[6], # c_out
],
tf.int64,
tf.int64, # Must match SpyLayer.output's type.
stateful=True)
# Compute outputs
with tf.control_dependencies([spy_fn]):
with tf1.control_dependencies([spy_fn]):
return self.dense(inputs[1])
@staticmethod
@@ -48,7 +50,8 @@ class SpyLayer(tf.keras.layers.Layer):
"""The actual spy operation: Store inputs in internal_kv."""
if len(inputs) == 1:
return 0 # don't capture inference inputs
# don't capture inference inputs
return SpyLayer.output
# TF runs this function in an isolated context, so we have to use
# redis to communicate back to our suite
ray.experimental.internal_kv._internal_kv_put(
@@ -61,7 +64,7 @@ class SpyLayer(tf.keras.layers.Layer):
}),
overwrite=True)
RNNSpyModel.capture_index += 1
return 0
return SpyLayer.output
class RNNSpyModel(RecurrentNetwork):
@@ -7,7 +7,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf, try_import_torch
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()
@@ -15,7 +15,7 @@ class SharedWeightsModel1(TFModelV2):
"""Example of weight sharing between two different TFModelV2s.
Here, we share the variables defined in the 'shared' variable scope
by entering it explicitly with tf.AUTO_REUSE. This creates the
by entering it explicitly with tf1.AUTO_REUSE. This creates the
variables for the 'fc1' layer in a global scope called 'shared'
(outside of the Policy's normal variable scope).
"""
@@ -26,9 +26,9 @@ class SharedWeightsModel1(TFModelV2):
model_config, name)
inputs = tf.keras.layers.Input(observation_space.shape)
with tf.variable_scope(
tf.VariableScope(tf.AUTO_REUSE, "shared"),
reuse=tf.AUTO_REUSE,
with tf1.variable_scope(
tf1.VariableScope(tf1.AUTO_REUSE, "shared"),
reuse=tf1.AUTO_REUSE,
auxiliary_name_scope=False):
last_layer = tf.keras.layers.Dense(
units=64, activation=tf.nn.relu, name="fc1")(inputs)
@@ -60,9 +60,9 @@ class SharedWeightsModel2(TFModelV2):
inputs = tf.keras.layers.Input(observation_space.shape)
# Weights shared with SharedWeightsModel1.
with tf.variable_scope(
tf.VariableScope(tf.AUTO_REUSE, "shared"),
reuse=tf.AUTO_REUSE,
with tf1.variable_scope(
tf1.VariableScope(tf1.AUTO_REUSE, "shared"),
reuse=tf1.AUTO_REUSE,
auxiliary_name_scope=False):
last_layer = tf.keras.layers.Dense(
units=64, activation=tf.nn.relu, name="fc1")(inputs)
+1 -1
View File
@@ -4,7 +4,7 @@ from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFCNet
from ray.rllib.utils.framework import try_import_tf, try_import_torch
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()
+1 -1
View File
@@ -22,7 +22,7 @@ from ray.rllib.models import ModelCatalog
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check_learning_achieved
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
parser = argparse.ArgumentParser()
@@ -20,7 +20,7 @@ from ray.rllib.examples.policy.rock_paper_scissors_dummies import \
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.test_utils import check_learning_achieved
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, _ = try_import_torch()
parser = argparse.ArgumentParser()
+13 -13
View File
@@ -5,7 +5,7 @@ from ray.util.debug import log_once
from ray.rllib.utils.debug import summarize
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
# Variable scope in which created variables will be placed under
TOWER_SCOPE_NAME = "tower"
@@ -26,7 +26,7 @@ class LocalSyncParallelOptimizer:
`load_data`, so you can make multiple passes (possibly in randomized order)
over the same data once loaded.
This is similar to tf.train.SyncReplicasOptimizer, but works within a
This is similar to tf1.train.SyncReplicasOptimizer, but works within a
single TensorFlow graph, i.e. implements in-graph replicated training:
https://www.tensorflow.org/api_docs/python/tf/train/SyncReplicasOptimizer
@@ -63,21 +63,21 @@ class LocalSyncParallelOptimizer:
self.build_graph = build_graph
# First initialize the shared loss network
with tf.name_scope(TOWER_SCOPE_NAME):
with tf1.name_scope(TOWER_SCOPE_NAME):
self._shared_loss = build_graph(self.loss_inputs)
shared_ops = tf.get_collection(
tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name)
shared_ops = tf1.get_collection(
tf1.GraphKeys.UPDATE_OPS, scope=tf1.get_variable_scope().name)
# Then setup the per-device loss graphs that use the shared weights
self._batch_index = tf.placeholder(tf.int32, name="batch_index")
self._batch_index = tf1.placeholder(tf.int32, name="batch_index")
# Dynamic batch size, which may be shrunk if there isn't enough data
self._per_device_batch_size = tf.placeholder(
self._per_device_batch_size = tf1.placeholder(
tf.int32, name="per_device_batch_size")
self._loaded_per_device_batch_size = max_per_device_batch_size
# When loading RNN input, we dynamically determine the max seq len
self._max_seq_len = tf.placeholder(tf.int32, name="max_seq_len")
self._max_seq_len = tf1.placeholder(tf.int32, name="max_seq_len")
self._loaded_max_seq_len = 1
# Split on the CPU in case the data doesn't fit in GPU memory.
@@ -103,15 +103,15 @@ class LocalSyncParallelOptimizer:
# gather update ops for any batch norm layers. TODO(ekl) here we will
# use all the ops found which won't work for DQN / DDPG, but those
# aren't supported with multi-gpu right now anyways.
self._update_ops = tf.get_collection(
tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name)
self._update_ops = tf1.get_collection(
tf1.GraphKeys.UPDATE_OPS, scope=tf1.get_variable_scope().name)
for op in shared_ops:
self._update_ops.remove(op) # only care about tower update ops
if self._update_ops:
logger.debug("Update ops to run on apply gradient: {}".format(
self._update_ops))
with tf.control_dependencies(self._update_ops):
with tf1.control_dependencies(self._update_ops):
self._train_op = self.optimizer.apply_gradients(avg)
def load_data(self, sess, inputs, state_inputs):
@@ -265,11 +265,11 @@ class LocalSyncParallelOptimizer:
def _setup_device(self, device, device_input_placeholders, num_data_in):
assert num_data_in <= len(device_input_placeholders)
with tf.device(device):
with tf.name_scope(TOWER_SCOPE_NAME):
with tf1.name_scope(TOWER_SCOPE_NAME):
device_input_batches = []
device_input_slices = []
for i, ph in enumerate(device_input_placeholders):
current_batch = tf.Variable(
current_batch = tf1.Variable(
ph,
trainable=False,
validate_shape=False,
+5 -4
View File
@@ -13,7 +13,7 @@ from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.timer import TimerStat
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
logger = logging.getLogger(__name__)
@@ -84,14 +84,15 @@ class TFMultiGPULearner(LearnerThread):
self.par_opt = []
with self.local_worker.tf_sess.graph.as_default():
with self.local_worker.tf_sess.as_default():
with tf.variable_scope(DEFAULT_POLICY_ID, reuse=tf.AUTO_REUSE):
with tf1.variable_scope(
DEFAULT_POLICY_ID, reuse=tf1.AUTO_REUSE):
if self.policy._state_inputs:
rnn_inputs = self.policy._state_inputs + [
self.policy._seq_lens
]
else:
rnn_inputs = []
adam = tf.train.AdamOptimizer(self.lr)
adam = tf1.train.AdamOptimizer(self.lr)
for _ in range(num_data_loader_buffers):
self.par_opt.append(
LocalSyncParallelOptimizer(
@@ -103,7 +104,7 @@ class TFMultiGPULearner(LearnerThread):
self.policy.copy))
self.sess = self.local_worker.tf_sess
self.sess.run(tf.global_variables_initializer())
self.sess.run(tf1.global_variables_initializer())
self.idle_optimizers = queue.Queue()
self.ready_optimizers = queue.Queue()
+3 -3
View File
@@ -20,7 +20,7 @@ from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.sgd import do_minibatch_sgd, averaged
from ray.rllib.utils.types import PolicyID, SampleBatchType
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
logger = logging.getLogger(__name__)
@@ -137,7 +137,7 @@ class TrainTFMultiGPU:
with self.workers.local_worker().tf_sess.as_default():
for policy_id in self.policies:
policy = self.workers.local_worker().get_policy(policy_id)
with tf.variable_scope(policy_id, reuse=tf.AUTO_REUSE):
with tf1.variable_scope(policy_id, reuse=tf1.AUTO_REUSE):
if policy._state_inputs:
rnn_inputs = policy._state_inputs + [
policy._seq_lens
@@ -152,7 +152,7 @@ class TrainTFMultiGPU:
self.per_device_batch_size, policy.copy))
self.sess = self.workers.local_worker().tf_sess
self.sess.run(tf.global_variables_initializer())
self.sess.run(tf1.global_variables_initializer())
def __call__(self,
samples: SampleBatchType) -> (SampleBatchType, List[dict]):
+2 -2
View File
@@ -27,7 +27,7 @@ from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.spaces.simplex import Simplex
from ray.rllib.utils.spaces.space_utils import flatten_space
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
tree = try_import_tree()
logger = logging.getLogger(__name__)
@@ -257,7 +257,7 @@ class ModelCatalog:
dtype, shape = ModelCatalog.get_action_shape(action_space)
return tf.placeholder(dtype, shape=shape, name=name)
return tf1.placeholder(dtype, shape=shape, name=name)
@staticmethod
@DeveloperAPI
+7 -7
View File
@@ -8,7 +8,7 @@ from ray.rllib.utils.annotations import PublicAPI, DeveloperAPI
from ray.rllib.utils.deprecation import deprecation_warning
from ray.rllib.utils.framework import try_import_tf, try_import_torch
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, _ = try_import_torch()
logger = logging.getLogger(__name__)
@@ -38,13 +38,13 @@ class Model:
self.action_space = action_space
self.num_outputs = num_outputs
self.options = options
self.scope = tf.get_variable_scope()
self.session = tf.get_default_session()
self.scope = tf1.get_variable_scope()
self.session = tf1.get_default_session()
self.input_dict = input_dict
if seq_lens is not None:
self.seq_lens = seq_lens
else:
self.seq_lens = tf.placeholder(
self.seq_lens = tf1.placeholder(
dtype=tf.int32, shape=[None], name="seq_lens")
self._num_outputs = num_outputs
@@ -68,10 +68,10 @@ class Model:
input_dict["obs"], num_outputs, options)
if options.get("free_log_std", False):
log_std = tf.get_variable(
log_std = tf1.get_variable(
name="log_std",
shape=[num_outputs],
initializer=tf.zeros_initializer)
initializer=tf1.zeros_initializer)
self.outputs = tf.concat(
[self.outputs, 0.0 * self.outputs + log_std], 1)
@@ -196,7 +196,7 @@ class Model:
def flatten(obs, framework):
"""Flatten the given tensor."""
if framework == "tf":
return tf.layers.flatten(obs)
return tf1.layers.flatten(obs)
elif framework == "torch":
assert torch is not None
return torch.flatten(obs, start_dim=1)
+2 -2
View File
@@ -13,7 +13,7 @@ from ray.rllib.utils.framework import try_import_tf, try_import_torch, \
from ray.rllib.utils.spaces.repeated import Repeated
from ray.rllib.utils.types import ModelConfigDict
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, _ = try_import_torch()
@@ -339,7 +339,7 @@ class NullContextManager:
def flatten(obs, framework):
"""Flatten the given tensor."""
if framework == "tf":
return tf.layers.flatten(obs)
return tf1.layers.flatten(obs)
elif framework == "torch":
assert torch is not None
return torch.flatten(obs, start_dim=1)
@@ -13,7 +13,7 @@ from ray.rllib.utils.framework import try_import_torch, try_import_tf
from ray.rllib.utils.test_utils import framework_iterator
torch, nn = try_import_torch()
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class TestModules(unittest.TestCase):
@@ -144,7 +144,7 @@ class TestModules(unittest.TestCase):
model = TorchMultiHeadAttention(
in_dim=D_in, out_dim=D_out, num_heads=2, head_dim=32)
self.train_torch_layer(model, x, y)
self.train_torch_layer(model, x, y, num_epochs=500)
else: # framework is tensorflow or tensorflow-eager
@@ -165,7 +165,7 @@ class TestModules(unittest.TestCase):
that it trains in a supervised setting."""
# Checks that torch and tf embedding matrices are the same
with tf.Session().as_default() as sess:
with tf1.Session().as_default() as sess:
assert np.allclose(
relative_position_embedding(20, 15).eval(session=sess),
relative_position_embedding_torch(20, 15).numpy())
+4 -4
View File
@@ -16,7 +16,7 @@ from ray.rllib.utils.numpy import MIN_LOG_NN_OUTPUT, MAX_LOG_NN_OUTPUT, \
softmax, SMALL_NUMBER, LARGE_INTEGER
from ray.rllib.utils.test_utils import check, framework_iterator
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, _ = try_import_torch()
tree = try_import_tree()
@@ -75,13 +75,13 @@ class TestDistributions(unittest.TestCase):
def test_categorical(self):
"""Tests the Categorical ActionDistribution (tf only)."""
num_samples = 100000
logits = tf.placeholder(tf.float32, shape=(None, 10))
logits = tf1.placeholder(tf.float32, shape=(None, 10))
z = 8 * (np.random.rand(10) - 0.5)
data = np.tile(z, (num_samples, 1))
c = Categorical(logits, {}) # dummy config dict
sample_op = c.sample()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
sess = tf1.Session()
sess.run(tf1.global_variables_initializer())
samples = sess.run(sample_op, feed_dict={logits: data})
counts = np.zeros(10)
for sample in samples:
+1 -1
View File
@@ -17,7 +17,7 @@ from ray.rllib.models.tf.recurrent_net import RecurrentNetwork
from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
# TODO(sven): Use RLlib's FCNet instead.
+1 -1
View File
@@ -4,7 +4,7 @@ from ray.rllib.models.tf.misc import normc_initializer
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.utils.framework import get_activation_fn, try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class FullyConnectedNetwork(TFModelV2):
+6 -6
View File
@@ -4,7 +4,7 @@ from ray.rllib.utils.annotations import override
from ray.rllib.utils.deprecation import deprecation_warning
from ray.rllib.utils.framework import get_activation_fn, try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
# Deprecated: see as an alternative models/tf.fcnet.py
@@ -29,15 +29,15 @@ class FullyConnectedNetwork(Model):
activation = get_activation_fn(options.get("fcnet_activation"))
if len(inputs.shape) > 2:
inputs = tf.layers.flatten(inputs)
inputs = tf1.layers.flatten(inputs)
with tf.name_scope("fc_net"):
with tf1.name_scope("fc_net"):
i = 1
last_layer = inputs
for size in hiddens:
# skip final linear layer
if options.get("no_final_linear") and i == len(hiddens):
output = tf.layers.dense(
output = tf1.layers.dense(
last_layer,
num_outputs,
kernel_initializer=normc_initializer(1.0),
@@ -46,7 +46,7 @@ class FullyConnectedNetwork(Model):
return output, output
label = "fc{}".format(i)
last_layer = tf.layers.dense(
last_layer = tf1.layers.dense(
last_layer,
size,
kernel_initializer=normc_initializer(1.0),
@@ -54,7 +54,7 @@ class FullyConnectedNetwork(Model):
name=label)
i += 1
output = tf.layers.dense(
output = tf1.layers.dense(
last_layer,
num_outputs,
kernel_initializer=normc_initializer(0.01),
+1 -1
View File
@@ -1,6 +1,6 @@
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class GRUGate(tf.keras.layers.Layer):
@@ -5,7 +5,7 @@
"""
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class MultiHeadAttention(tf.keras.layers.Layer):
+1 -1
View File
@@ -3,7 +3,7 @@ import numpy as np
from ray.rllib.utils.framework import get_activation_fn, get_variable, \
try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class NoisyLayer(tf.keras.layers.Layer):
@@ -1,6 +1,6 @@
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class RelativeMultiHeadAttention(tf.keras.layers.Layer):
+1 -1
View File
@@ -1,6 +1,6 @@
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class SkipConnection(tf.keras.layers.Layer):
+6 -6
View File
@@ -7,7 +7,7 @@ from ray.rllib.utils.annotations import override
from ray.rllib.utils.deprecation import deprecation_warning
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
# Deprecated: see as an alternative models/tf/recurrent_net.py
@@ -45,7 +45,7 @@ class LSTM(Model):
last_layer = add_time_dimension(features, self.seq_lens)
# Setup the LSTM cell
lstm = tf.nn.rnn_cell.LSTMCell(cell_size, state_is_tuple=True)
lstm = tf1.nn.rnn_cell.LSTMCell(cell_size, state_is_tuple=True)
self.state_init = [
np.zeros(lstm.state_size.c, np.float32),
np.zeros(lstm.state_size.h, np.float32)
@@ -55,15 +55,15 @@ class LSTM(Model):
if self.state_in:
c_in, h_in = self.state_in
else:
c_in = tf.placeholder(
c_in = tf1.placeholder(
tf.float32, [None, lstm.state_size.c], name="c")
h_in = tf.placeholder(
h_in = tf1.placeholder(
tf.float32, [None, lstm.state_size.h], name="h")
self.state_in = [c_in, h_in]
# Setup LSTM outputs
state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
lstm_out, lstm_state = tf.nn.dynamic_rnn(
state_in = tf1.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
lstm_out, lstm_state = tf1.nn.dynamic_rnn(
lstm,
last_layer,
initial_state=state_in,
+10 -10
View File
@@ -1,7 +1,7 @@
import numpy as np
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
def normc_initializer(std=1.0):
@@ -24,7 +24,7 @@ def conv2d(x,
if dtype is None:
dtype = tf.float32
with tf.variable_scope(name):
with tf1.variable_scope(name):
stride_shape = [1, stride[0], stride[1], 1]
filter_shape = [
filter_size[0], filter_size[1],
@@ -40,24 +40,24 @@ def conv2d(x,
# Initialize weights with random weights.
w_bound = np.sqrt(6 / (fan_in + fan_out))
w = tf.get_variable(
w = tf1.get_variable(
"W",
filter_shape,
dtype,
tf.random_uniform_initializer(-w_bound, w_bound),
tf1.random_uniform_initializer(-w_bound, w_bound),
collections=collections)
b = tf.get_variable(
b = tf1.get_variable(
"b", [1, 1, 1, num_filters],
initializer=tf.constant_initializer(0.0),
initializer=tf1.constant_initializer(0.0),
collections=collections)
return tf.nn.conv2d(x, w, stride_shape, pad) + b
return tf1.nn.conv2d(x, w, stride_shape, pad) + b
def linear(x, size, name, initializer=None, bias_init=0):
w = tf.get_variable(
w = tf1.get_variable(
name + "/w", [x.get_shape()[1], size], initializer=initializer)
b = tf.get_variable(
name + "/b", [size], initializer=tf.constant_initializer(bias_init))
b = tf1.get_variable(
name + "/b", [size], initializer=tf1.constant_initializer(bias_init))
return tf.matmul(x, w) + b
+9 -8
View File
@@ -9,7 +9,7 @@ from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.tf_ops import scope_vars
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
logger = logging.getLogger(__name__)
@@ -47,7 +47,7 @@ def make_v1_wrapper(legacy_model_cls):
# Tracks update ops
self._update_ops = None
with tf.variable_scope(self.name) as scope:
with tf1.variable_scope(self.name) as scope:
self.variable_scope = scope
@override(ModelV2)
@@ -58,20 +58,20 @@ def make_v1_wrapper(legacy_model_cls):
def __call__(self, input_dict, state, seq_lens):
if self.cur_instance:
# create a weight-sharing model copy
with tf.variable_scope(self.cur_instance.scope, reuse=True):
with tf1.variable_scope(self.cur_instance.scope, reuse=True):
new_instance = self.legacy_model_cls(
input_dict, self.obs_space, self.action_space,
self.num_outputs, self.model_config, state, seq_lens)
else:
# create a new model instance
with tf.variable_scope(self.name):
with tf1.variable_scope(self.name):
prev_update_ops = set(
tf.get_collection(tf.GraphKeys.UPDATE_OPS))
tf1.get_collection(tf1.GraphKeys.UPDATE_OPS))
new_instance = self.legacy_model_cls(
input_dict, self.obs_space, self.action_space,
self.num_outputs, self.model_config, state, seq_lens)
self._update_ops = list(
set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) -
set(tf1.get_collection(tf1.GraphKeys.UPDATE_OPS)) -
prev_update_ops)
if len(new_instance.state_init) != len(self.get_initial_state()):
raise ValueError(
@@ -112,8 +112,9 @@ def make_v1_wrapper(legacy_model_cls):
def value_function(self):
assert self.cur_instance is not None, "must call forward first"
with tf.variable_scope(self.variable_scope):
with tf.variable_scope("value_function", reuse=tf.AUTO_REUSE):
with tf1.variable_scope(self.variable_scope):
with tf1.variable_scope(
"value_function", reuse=tf1.AUTO_REUSE):
# Simple case: sharing the feature layer
if self.model_config["vf_share_layers"]:
return tf.reshape(
+12 -13
View File
@@ -7,7 +7,7 @@ from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.utils.annotations import override, DeveloperAPI
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
@DeveloperAPI
@@ -160,18 +160,17 @@ class LSTMWrapper(RecurrentNetwork):
# Concat. prev-action/reward if required.
if self.model_config["lstm_use_prev_action_reward"]:
if self.model_config["lstm_use_prev_action_reward"]:
wrapped_out = tf.concat(
[
wrapped_out,
tf.reshape(
tf.cast(input_dict[SampleBatch.PREV_ACTIONS],
tf.float32), [-1, self.action_dim]),
tf.reshape(
tf.cast(input_dict[SampleBatch.PREV_REWARDS],
tf.float32), [-1, 1]),
],
axis=1)
wrapped_out = tf.concat(
[
wrapped_out,
tf.reshape(
tf.cast(input_dict[SampleBatch.PREV_ACTIONS],
tf.float32), [-1, self.action_dim]),
tf.reshape(
tf.cast(input_dict[SampleBatch.PREV_REWARDS],
tf.float32), [-1, 1]),
],
axis=1)
# Then through our LSTM.
input_dict["obs_flat"] = wrapped_out
+2 -2
View File
@@ -9,7 +9,7 @@ from ray.rllib.utils.annotations import override, DeveloperAPI
from ray.rllib.utils.framework import try_import_tf, try_import_tfp
from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
tfp = try_import_tfp()
tree = try_import_tree()
@@ -85,7 +85,7 @@ class Categorical(TFActionDistribution):
@override(TFActionDistribution)
def _build_sample_op(self):
return tf.squeeze(tf.multinomial(self.inputs, 1), axis=1)
return tf.squeeze(tf.random.categorical(self.inputs, 1), axis=1)
@staticmethod
@override(ActionDistribution)
+3 -3
View File
@@ -2,7 +2,7 @@ from ray.rllib.models.modelv2 import ModelV2
from ray.rllib.utils.annotations import override, PublicAPI
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
@PublicAPI
@@ -39,10 +39,10 @@ class TFModelV2(ModelV2):
name,
framework="tf")
self.var_list = []
if tf.executing_eagerly():
if tf1.executing_eagerly():
self.graph = None
else:
self.graph = tf.get_default_graph()
self.graph = tf1.get_default_graph()
def context(self):
"""Returns a contextmanager for the current TF graph."""
+1 -1
View File
@@ -3,7 +3,7 @@ from ray.rllib.models.tf.visionnet_v1 import _get_filter_config
from ray.rllib.models.tf.misc import normc_initializer
from ray.rllib.utils.framework import get_activation_fn, try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class VisionNetwork(TFModelV2):
+6 -6
View File
@@ -4,7 +4,7 @@ from ray.rllib.utils.annotations import override
from ray.rllib.utils.deprecation import deprecation_warning
from ray.rllib.utils.framework import get_activation_fn, try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
# Deprecated: see as an alternative models/tf.visionnet.py
@@ -24,9 +24,9 @@ class VisionNetwork(Model):
activation = get_activation_fn(options.get("conv_activation"))
with tf.name_scope("vision_net"):
with tf1.name_scope("vision_net"):
for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
inputs = tf.layers.conv2d(
inputs = tf1.layers.conv2d(
inputs,
out_size,
kernel,
@@ -38,7 +38,7 @@ class VisionNetwork(Model):
# skip final linear layer
if options.get("no_final_linear"):
fc_out = tf.layers.conv2d(
fc_out = tf1.layers.conv2d(
inputs,
num_outputs,
kernel,
@@ -48,7 +48,7 @@ class VisionNetwork(Model):
name="fc_out")
return flatten(fc_out), flatten(fc_out)
fc1 = tf.layers.conv2d(
fc1 = tf1.layers.conv2d(
inputs,
out_size,
kernel,
@@ -56,7 +56,7 @@ class VisionNetwork(Model):
activation=activation,
padding="valid",
name="fc1")
fc2 = tf.layers.conv2d(
fc2 = tf1.layers.conv2d(
fc1,
num_outputs, [1, 1],
activation=None,
+4 -4
View File
@@ -6,7 +6,7 @@ from ray.rllib.policy.sample_batch import MultiAgentBatch
from ray.rllib.utils.annotations import PublicAPI
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
logger = logging.getLogger(__name__)
@@ -75,7 +75,7 @@ class InputReader:
k: (-1, ) + s[1:]
for (k, s) in [(k, batch[k].shape) for k in keys]
}
queue = tf.FIFOQueue(capacity=queue_size, dtypes=dtypes, names=keys)
queue = tf1.FIFOQueue(capacity=queue_size, dtypes=dtypes, names=keys)
tensors = queue.dequeue()
logger.info("Creating TF queue runner for {}".format(self))
@@ -92,12 +92,12 @@ class _QueueRunner(threading.Thread):
def __init__(self, input_reader, queue, keys, dtypes):
threading.Thread.__init__(self)
self.sess = tf.get_default_session()
self.sess = tf1.get_default_session()
self.daemon = True
self.input_reader = input_reader
self.keys = keys
self.queue = queue
self.placeholders = [tf.placeholder(dtype) for dtype in dtypes]
self.placeholders = [tf1.placeholder(dtype) for dtype in dtypes]
self.enqueue_op = queue.enqueue(dict(zip(keys, self.placeholders)))
def enqueue(self, batch):
+2 -2
View File
@@ -45,7 +45,7 @@ class JsonReader(InputReader):
logger.warning(
"Treating input directory as glob pattern: {}".format(
inputs))
if urlparse(inputs).scheme not in ["d", ""]:
if urlparse(inputs).scheme not in ["", "c"]:
raise ValueError(
"Don't know how to glob over `{}`, ".format(inputs) +
"please specify a list of files to read instead.")
@@ -123,7 +123,7 @@ class JsonReader(InputReader):
def _next_file(self):
path = random.choice(self.files)
if urlparse(path).scheme:
if urlparse(path).scheme not in ["", "c"]:
if smart_open is None:
raise ValueError(
"You must install the `smart_open` module to read "
+1 -1
View File
@@ -42,7 +42,7 @@ class JsonWriter(OutputWriter):
self.ioctx = ioctx or IOContext()
self.max_file_size = max_file_size
self.compress_columns = compress_columns
if urlparse(path).scheme:
if urlparse(path).scheme not in ["", "c"]:
self.path_is_uri = True
else:
path = os.path.abspath(os.path.expanduser(path))
+5 -4
View File
@@ -15,7 +15,7 @@ from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.timer import TimerStat
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
logger = logging.getLogger(__name__)
@@ -86,14 +86,15 @@ class TFMultiGPULearner(LearnerThread):
self.par_opt = []
with self.local_worker.tf_sess.graph.as_default():
with self.local_worker.tf_sess.as_default():
with tf.variable_scope(DEFAULT_POLICY_ID, reuse=tf.AUTO_REUSE):
with tf1.variable_scope(
DEFAULT_POLICY_ID, reuse=tf1.AUTO_REUSE):
if self.policy._state_inputs:
rnn_inputs = self.policy._state_inputs + [
self.policy._seq_lens
]
else:
rnn_inputs = []
adam = tf.train.AdamOptimizer(self.lr)
adam = tf1.train.AdamOptimizer(self.lr)
for _ in range(num_data_loader_buffers):
self.par_opt.append(
LocalSyncParallelOptimizer(
@@ -105,7 +106,7 @@ class TFMultiGPULearner(LearnerThread):
self.policy.copy))
self.sess = self.local_worker.tf_sess
self.sess.run(tf.global_variables_initializer())
self.sess.run(tf1.global_variables_initializer())
self.idle_optimizers = queue.Queue()
self.ready_optimizers = queue.Queue()
+12 -12
View File
@@ -5,7 +5,7 @@ from ray.util.debug import log_once
from ray.rllib.utils.debug import summarize
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
# Variable scope in which created variables will be placed under
TOWER_SCOPE_NAME = "tower"
@@ -63,21 +63,21 @@ class LocalSyncParallelOptimizer:
self.build_graph = build_graph
# First initialize the shared loss network
with tf.name_scope(TOWER_SCOPE_NAME):
with tf1.name_scope(TOWER_SCOPE_NAME):
self._shared_loss = build_graph(self.loss_inputs)
shared_ops = tf.get_collection(
tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name)
shared_ops = tf1.get_collection(
tf1.GraphKeys.UPDATE_OPS, scope=tf1.get_variable_scope().name)
# Then setup the per-device loss graphs that use the shared weights
self._batch_index = tf.placeholder(tf.int32, name="batch_index")
self._batch_index = tf1.placeholder(tf.int32, name="batch_index")
# Dynamic batch size, which may be shrunk if there isn't enough data
self._per_device_batch_size = tf.placeholder(
self._per_device_batch_size = tf1.placeholder(
tf.int32, name="per_device_batch_size")
self._loaded_per_device_batch_size = max_per_device_batch_size
# When loading RNN input, we dynamically determine the max seq len
self._max_seq_len = tf.placeholder(tf.int32, name="max_seq_len")
self._max_seq_len = tf1.placeholder(tf.int32, name="max_seq_len")
self._loaded_max_seq_len = 1
# Split on the CPU in case the data doesn't fit in GPU memory.
@@ -103,15 +103,15 @@ class LocalSyncParallelOptimizer:
# gather update ops for any batch norm layers. TODO(ekl) here we will
# use all the ops found which won't work for DQN / DDPG, but those
# aren't supported with multi-gpu right now anyways.
self._update_ops = tf.get_collection(
tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name)
self._update_ops = tf1.get_collection(
tf1.GraphKeys.UPDATE_OPS, scope=tf1.get_variable_scope().name)
for op in shared_ops:
self._update_ops.remove(op) # only care about tower update ops
if self._update_ops:
logger.debug("Update ops to run on apply gradient: {}".format(
self._update_ops))
with tf.control_dependencies(self._update_ops):
with tf1.control_dependencies(self._update_ops):
self._train_op = self.optimizer.apply_gradients(avg)
def load_data(self, sess, inputs, state_inputs):
@@ -265,11 +265,11 @@ class LocalSyncParallelOptimizer:
def _setup_device(self, device, device_input_placeholders, num_data_in):
assert num_data_in <= len(device_input_placeholders)
with tf.device(device):
with tf.name_scope(TOWER_SCOPE_NAME):
with tf1.name_scope(TOWER_SCOPE_NAME):
device_input_batches = []
device_input_slices = []
for i, ph in enumerate(device_input_placeholders):
current_batch = tf.Variable(
current_batch = tf1.Variable(
ph,
trainable=False,
validate_shape=False,
+3 -3
View File
@@ -16,7 +16,7 @@ from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.sgd import averaged
from ray.rllib.utils.timer import TimerStat
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
logger = logging.getLogger(__name__)
@@ -115,7 +115,7 @@ class LocalMultiGPUOptimizer(PolicyOptimizer):
with self.workers.local_worker().tf_sess.graph.as_default():
with self.workers.local_worker().tf_sess.as_default():
for policy_id, policy in self.policies.items():
with tf.variable_scope(policy_id, reuse=tf.AUTO_REUSE):
with tf1.variable_scope(policy_id, reuse=tf1.AUTO_REUSE):
if policy._state_inputs:
rnn_inputs = policy._state_inputs + [
policy._seq_lens
@@ -130,7 +130,7 @@ class LocalMultiGPUOptimizer(PolicyOptimizer):
self.per_device_batch_size, policy.copy))
self.sess = self.workers.local_worker().tf_sess
self.sess.run(tf.global_variables_initializer())
self.sess.run(tf1.global_variables_initializer())
@override(PolicyOptimizer)
def step(self):
+2 -2
View File
@@ -14,7 +14,7 @@ from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.tests.mock_worker import _MockWorker
from ray.rllib.utils.framework import try_import_tf
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
class LRScheduleTest(unittest.TestCase):
@@ -250,7 +250,7 @@ class AsyncSamplesOptimizerTest(unittest.TestCase):
def _make_envs(self):
def make_sess():
return tf.Session(config=tf.ConfigProto(device_count={"CPU": 2}))
return tf1.Session(config=tf1.ConfigProto(device_count={"CPU": 2}))
local = RolloutWorker(
env_creator=lambda _: gym.make("CartPole-v0"),
+11 -11
View File
@@ -14,7 +14,7 @@ from ray.rllib.utils.debug import summarize
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.tracking_dict import UsageTrackingDict
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
logger = logging.getLogger(__name__)
@@ -116,7 +116,7 @@ class DynamicTFPolicy(TFPolicy):
explore = existing_inputs["is_exploring"]
timestep = existing_inputs["timestep"]
else:
obs = tf.placeholder(
obs = tf1.placeholder(
tf.float32,
shape=[None] + list(obs_space.shape),
name="observation")
@@ -124,11 +124,11 @@ class DynamicTFPolicy(TFPolicy):
if self._obs_include_prev_action_reward:
prev_actions = ModelCatalog.get_action_placeholder(
action_space, "prev_action")
prev_rewards = tf.placeholder(
prev_rewards = tf1.placeholder(
tf.float32, [None], name="prev_reward")
explore = tf.placeholder_with_default(
explore = tf1.placeholder_with_default(
True, (), name="is_exploring")
timestep = tf.placeholder(tf.int32, (), name="timestep")
timestep = tf1.placeholder(tf.int32, (), name="timestep")
self._input_dict = {
SampleBatch.CUR_OBS: obs,
@@ -137,7 +137,7 @@ class DynamicTFPolicy(TFPolicy):
"is_training": self._get_is_training_placeholder(),
}
# Placeholder for RNN time-chunk valid lengths.
self._seq_lens = tf.placeholder(
self._seq_lens = tf1.placeholder(
dtype=tf.int32, shape=[None], name="seq_lens")
dist_class = dist_inputs = None
@@ -176,7 +176,7 @@ class DynamicTFPolicy(TFPolicy):
self._seq_lens = existing_inputs["seq_lens"]
else:
self._state_in = [
tf.placeholder(shape=(None, ) + s.shape, dtype=s.dtype)
tf1.placeholder(shape=(None, ) + s.shape, dtype=s.dtype)
for s in self.model.get_initial_state()
]
@@ -223,7 +223,7 @@ class DynamicTFPolicy(TFPolicy):
explore=explore)
# Phase 1 init.
sess = tf.get_default_session() or tf.Session()
sess = tf1.get_default_session() or tf1.Session()
if get_batch_divisibility_req:
batch_divisibility_req = get_batch_divisibility_req(self)
else:
@@ -343,7 +343,7 @@ class DynamicTFPolicy(TFPolicy):
dummy_batch[k] = fake_array(v)
# postprocessing might depend on variable init, so run it first here
self._sess.run(tf.global_variables_initializer())
self._sess.run(tf1.global_variables_initializer())
postprocessed_batch = self.postprocess_trajectory(
SampleBatch(dummy_batch))
@@ -380,7 +380,7 @@ class DynamicTFPolicy(TFPolicy):
continue
shape = (None, ) + v.shape[1:]
dtype = np.float32 if v.dtype == np.float64 else v.dtype
placeholder = tf.placeholder(dtype, shape=shape, name=k)
placeholder = tf1.placeholder(dtype, shape=shape, name=k)
train_batch[k] = placeholder
for i, si in enumerate(self._state_in):
@@ -402,7 +402,7 @@ class DynamicTFPolicy(TFPolicy):
if self._grad_stats_fn:
self._stats_fetches.update(
self._grad_stats_fn(self, train_batch, self._grads))
self._sess.run(tf.global_variables_initializer())
self._sess.run(tf1.global_variables_initializer())
def _do_loss_init(self, train_batch):
loss = self._loss_fn(self, self.model, self.dist_class, train_batch)
+5 -6
View File
@@ -16,7 +16,7 @@ from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.spaces.space_utils import flatten_to_single_ndarray
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
logger = logging.getLogger(__name__)
@@ -239,7 +239,7 @@ def build_eager_tf_policy(name,
)
self.exploration = self._create_exploration()
self._state_in = [
tf.convert_to_tensor(np.array([s]))
tf.convert_to_tensor([s])
for s in self.model.get_initial_state()
]
input_dict = {
@@ -266,7 +266,7 @@ def build_eager_tf_policy(name,
if optimizer_fn:
self._optimizer = optimizer_fn(self, config)
else:
self._optimizer = tf.train.AdamOptimizer(config["lr"])
self._optimizer = tf1.train.AdamOptimizer(config["lr"])
if after_init:
after_init(self, observation_space, action_space, config)
@@ -618,8 +618,7 @@ def build_eager_tf_policy(name,
SampleBatch.DONES: np.array([False], dtype=np.bool),
SampleBatch.REWARDS: np.array([0], dtype=np.float32),
}
if isinstance(self.action_space, Tuple) or isinstance(
self.action_space, Dict):
if isinstance(self.action_space, (Dict, Tuple)):
dummy_batch[SampleBatch.ACTIONS] = [
flatten_to_single_ndarray(self.action_space.sample())
]
@@ -640,7 +639,7 @@ def build_eager_tf_policy(name,
dummy_batch["seq_lens"] = np.array([1], dtype=np.int32)
# Convert everything to tensors.
dummy_batch = tf.nest.map_structure(tf.convert_to_tensor,
dummy_batch = tf.nest.map_structure(tf1.convert_to_tensor,
dummy_batch)
# for IMPALA which expects a certain sample batch size.
+2 -2
View File
@@ -20,7 +20,7 @@ from ray.rllib.utils.annotations import DeveloperAPI
from ray.rllib.utils.debug import summarize
from ray.rllib.utils.framework import try_import_tf, try_import_torch
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
torch, _ = try_import_torch()
logger = logging.getLogger(__name__)
@@ -203,7 +203,7 @@ def chop_into_sequences(episode_ids,
seq_len = 0
unique_ids = np.add(
np.add(episode_ids, agent_indices),
np.array(unroll_ids) << 32)
np.array(unroll_ids, dtype=np.int64) << 32)
for uid in unique_ids:
if (prev_id is not None and uid != prev_id) or \
seq_len >= max_seq_len:
@@ -11,7 +11,7 @@ from ray.rllib.utils.test_utils import check, framework_iterator
from ray.rllib.utils.numpy import one_hot, fc, MIN_LOG_NN_OUTPUT, \
MAX_LOG_NN_OUTPUT
tf = try_import_tf()
tf1, tf, tfv = try_import_tf()
def do_test_log_likelihood(run,

Some files were not shown because too many files have changed in this diff Show More