[RLlib] Tf2.x native. (#8752)

This commit is contained in:
Sven Mika
2020-07-11 22:06:35 +02:00
committed by GitHub
parent 5c853eaa6a
commit fcdf410ae1
45 changed files with 359 additions and 202 deletions
+6 -6
View File
@@ -42,7 +42,7 @@
# A2C/A3C
py_test(
name = "regression_test_a2c_cartpole_tf",
name = "run_regression_tests_cartpole_a2c_tf",
main = "tests/run_regression_tests.py",
tags = ["learning_tests_tf", "learning_tests_cartpole"],
size = "medium",
@@ -52,7 +52,7 @@ py_test(
)
py_test(
name = "regression_test_a2c_cartpole_torch",
name = "run_regression_tests_cartpole_a2c_torch",
main = "tests/run_regression_tests.py",
tags = ["learning_tests_torch", "learning_tests_cartpole"],
size = "medium",
@@ -62,7 +62,7 @@ py_test(
)
py_test(
name = "regression_test_a3c_cartpole_tf",
name = "run_regression_tests_cartpole_a3c_tf",
main = "tests/run_regression_tests.py",
tags = ["learning_tests_tf", "learning_tests_cartpole"],
size = "medium",
@@ -72,7 +72,7 @@ py_test(
)
py_test(
name = "regression_test_a3c_cartpole_torch",
name = "run_regression_tests_cartpole_a3c_torch",
main = "tests/run_regression_tests.py",
tags = ["learning_tests_torch", "learning_tests_cartpole"],
size = "medium",
@@ -401,7 +401,7 @@ py_test(
py_test(
name = "test_apex_ddpg",
tags = ["agents_dir"],
size = "small",
size = "medium",
srcs = ["agents/ddpg/tests/test_apex_ddpg.py"]
)
@@ -455,7 +455,7 @@ py_test(
py_test(
name = "test_impala",
tags = ["agents_dir"],
size = "medium",
size = "large",
srcs = ["agents/impala/tests/test_impala.py"]
)
py_test(
+1 -1
View File
@@ -25,7 +25,7 @@ class TestA2C(unittest.TestCase):
# Test against all frameworks.
for fw in framework_iterator(config):
config["sample_async"] = fw in ["tf", "tfe"]
config["sample_async"] = fw in ["tf", "tfe", "tf2"]
for env in ["PongDeterministic-v0"]:
trainer = a3c.A2CTrainer(config=config, env=env)
for i in range(num_iterations):
+4 -4
View File
@@ -236,7 +236,7 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
def make_ddpg_optimizers(policy, config):
# Create separate optimizers for actor & critic losses.
if tfv == 2 and config["framework"] == "tfe":
if policy.config["framework"] in ["tf2", "tfe"]:
policy._actor_optimizer = tf.keras.optimizers.Adam(
learning_rate=config["actor_lr"])
policy._critic_optimizer = tf.keras.optimizers.Adam(
@@ -266,7 +266,7 @@ def build_apply_op(policy, optimizer, grads_and_vars):
critic_op = policy._critic_optimizer.apply_gradients(
policy._critic_grads_and_vars)
# Increment global step & apply ops.
if tfv == 2 and policy.config["framework"] == "tfe":
if policy.config["framework"] in ["tf2", "tfe"]:
policy.global_step.assign_add(1)
return tf.no_op()
else:
@@ -275,7 +275,7 @@ def build_apply_op(policy, optimizer, grads_and_vars):
def gradients_fn(policy, optimizer, loss):
if policy.config["framework"] == "tfe":
if policy.config["framework"] in ["tf2", "tfe"]:
tape = optimizer.tape
pol_weights = policy.model.policy_variables()
actor_grads_and_vars = list(zip(tape.gradient(
@@ -318,7 +318,7 @@ def build_ddpg_stats(policy, batch):
def before_init_fn(policy, obs_space, action_space, config):
# Create global step for counting the number of update operations.
if tfv == 2 and config["framework"] == "tfe":
if config["framework"] in ["tf2", "tfe"]:
policy.global_step = get_variable(0, tf_name="global_step")
else:
policy.global_step = tf1.train.get_or_create_global_step()
+1 -1
View File
@@ -24,7 +24,7 @@ class TestApexDDPG(unittest.TestCase):
config["learning_starts"] = 0
config["optimizer"]["num_replay_buffer_shards"] = 1
num_iterations = 1
for _ in framework_iterator(config, frameworks=("tf", "torch")):
for _ in framework_iterator(config):
plain_config = config.copy()
trainer = apex_ddpg.ApexDDPGTrainer(
config=plain_config, env="Pendulum-v0")
+5 -3
View File
@@ -9,12 +9,13 @@ from ray.rllib.agents.ddpg.ddpg_torch_policy import ddpg_actor_critic_loss as \
from ray.rllib.agents.sac.tests.test_sac import SimpleEnv
from ray.rllib.execution.replay_buffer import LocalReplayBuffer
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.utils.framework import try_import_torch
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.numpy import fc, huber_loss, l2_loss, relu, sigmoid
from ray.rllib.utils.test_utils import check, check_compute_single_action, \
framework_iterator
from ray.rllib.utils.torch_ops import convert_to_torch_tensor
tf1, tf, tfv = try_import_tf()
torch, _ = try_import_torch()
@@ -404,14 +405,15 @@ class TestDDPG(unittest.TestCase):
policy_t = sigmoid(2.0 * fc(
relu(
fc(model_out_t, weights[ks[1]], weights[ks[0]], framework=fw)),
weights[ks[5]], weights[ks[4]]))
weights[ks[5]], weights[ks[4]], framework=fw))
# Get policy output for t+1 (target model).
policy_tp1 = sigmoid(2.0 * fc(
relu(
fc(target_model_out_tp1,
weights[ks[3]],
weights[ks[2]],
framework=fw)), weights[ks[7]], weights[ks[6]]))
framework=fw)),
weights[ks[7]], weights[ks[6]], framework=fw))
# Assume no smooth target policy.
policy_tp1_smoothed = policy_tp1
View File
+6 -2
View File
@@ -253,8 +253,12 @@ def build_q_losses(policy, model, _, train_batch):
def adam_optimizer(policy, config):
return tf1.train.AdamOptimizer(
learning_rate=policy.cur_lr, epsilon=config["adam_epsilon"])
if policy.config["framework"] in ["tf2", "tfe"]:
return tf.keras.optimizers.Adam(
learning_rate=policy.cur_lr, epsilon=config["adam_epsilon"])
else:
return tf1.train.AdamOptimizer(
learning_rate=policy.cur_lr, epsilon=config["adam_epsilon"])
def clip_gradients(policy, optimizer, loss):
+6
View File
@@ -3,11 +3,14 @@ from six.moves import queue
from ray.rllib.evaluation.metrics import get_learner_stats
from ray.rllib.policy.policy import LEARNER_STATS_KEY
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.timer import TimerStat
from ray.rllib.utils.window_stat import WindowStat
LEARNER_QUEUE_MAX_SIZE = 16
tf1, tf, tfv = try_import_tf()
class LearnerThread(threading.Thread):
"""Background thread that updates the local model from replay data.
@@ -33,6 +36,9 @@ class LearnerThread(threading.Thread):
self.stats = {}
def run(self):
# Switch on eager mode if configured.
if self.local_worker.policy_config.get("framework") in ["tf2", "tfe"]:
tf1.enable_eager_execution()
while not self.stopped:
self.step()
+1 -1
View File
@@ -22,7 +22,7 @@ class TestApexDQN(unittest.TestCase):
config["timesteps_per_iteration"] = 100
config["min_iter_time_s"] = 1
config["optimizer"]["num_replay_buffer_shards"] = 1
for _ in framework_iterator(config, frameworks=("torch", "tf")):
for _ in framework_iterator(config):
trainer = apex.ApexTrainer(config=config, env="CartPole-v0")
trainer.train()
trainer.stop()
+6 -3
View File
@@ -3,12 +3,9 @@ import unittest
import ray
import ray.rllib.agents.dqn as dqn
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check, check_compute_single_action, \
framework_iterator
tf1, tf, tfv = try_import_tf()
class TestDQN(unittest.TestCase):
@classmethod
@@ -34,6 +31,7 @@ class TestDQN(unittest.TestCase):
print(results)
check_compute_single_action(trainer)
trainer.stop()
# Rainbow.
# TODO(sven): Add torch once DQN-torch supports distributional-Q.
@@ -51,6 +49,7 @@ class TestDQN(unittest.TestCase):
print(results)
check_compute_single_action(trainer)
trainer.stop()
def test_dqn_exploration_and_soft_q_config(self):
"""Tests, whether a DQN Agent outputs exploration/softmaxed actions."""
@@ -73,6 +72,7 @@ class TestDQN(unittest.TestCase):
for _ in range(50):
actions.append(trainer.compute_action(obs))
check(np.std(actions), 0.0, false=True)
trainer.stop()
# Low softmax temperature. Behaves like argmax
# (but no epsilon exploration).
@@ -86,6 +86,7 @@ class TestDQN(unittest.TestCase):
for _ in range(50):
actions.append(trainer.compute_action(obs))
check(np.std(actions), 0.0, decimals=3)
trainer.stop()
# Higher softmax temperature.
config["exploration_config"]["temperature"] = 1.0
@@ -104,6 +105,7 @@ class TestDQN(unittest.TestCase):
for _ in range(300):
actions.append(trainer.compute_action(obs))
check(np.std(actions), 0.0, false=True)
trainer.stop()
# With Random exploration.
config["exploration_config"] = {"type": "Random"}
@@ -113,6 +115,7 @@ class TestDQN(unittest.TestCase):
for _ in range(300):
actions.append(trainer.compute_action(obs))
check(np.std(actions), 0.0, false=True)
trainer.stop()
if __name__ == "__main__":
+1 -1
View File
@@ -23,7 +23,7 @@ class TestIMPALA(unittest.TestCase):
config = impala.DEFAULT_CONFIG.copy()
num_iterations = 1
for _ in framework_iterator(config, frameworks=("tf", "torch")):
for _ in framework_iterator(config):
local_cfg = config.copy()
for env in ["Pendulum-v0", "CartPole-v0"]:
print("Env={}".format(env))
+7 -9
View File
@@ -222,12 +222,10 @@ def multi_from_logits(behaviour_policy_logits,
behaviour_policy_logits[i].shape.assert_has_rank(3)
target_policy_logits[i].shape.assert_has_rank(3)
with tf1.name_scope(
name,
values=[
behaviour_policy_logits, target_policy_logits, actions,
discounts, rewards, values, bootstrap_value
]):
with tf1.name_scope(name, values=[
behaviour_policy_logits, target_policy_logits, actions,
discounts, rewards, values, bootstrap_value
]):
target_action_log_probs = multi_log_probs_from_logits_and_actions(
target_policy_logits, actions, dist_class, model)
@@ -332,9 +330,9 @@ def from_importance_weights(log_rhos,
if clip_pg_rho_threshold is not None:
clip_pg_rho_threshold.shape.assert_has_rank(0)
with tf1.name_scope(
name,
values=[log_rhos, discounts, rewards, values, bootstrap_value]):
with tf1.name_scope(name, values=[
log_rhos, discounts, rewards, values, bootstrap_value
]):
rhos = tf.math.exp(log_rhos)
if clip_rho_threshold is not None:
clipped_rhos = tf.minimum(
+12 -4
View File
@@ -253,11 +253,19 @@ def postprocess_trajectory(policy,
def choose_optimizer(policy, config):
if policy.config["opt_type"] == "adam":
return tf1.train.AdamOptimizer(policy.cur_lr)
if policy.config["framework"] in ["tf2", "tfe"]:
return tf.keras.optimizers.Adam(policy.cur_lr)
else:
return tf1.train.AdamOptimizer(policy.cur_lr)
else:
return tf1.train.RMSPropOptimizer(
policy.cur_lr,
config["decay"], config["momentum"], config["epsilon"])
if tfv == 2:
return tf.keras.optimizers.RMSprop(
policy.cur_lr, config["decay"], config["momentum"],
config["epsilon"])
else:
return tf1.train.RMSPropOptimizer(
policy.cur_lr, config["decay"], config["momentum"],
config["epsilon"])
def clip_gradients(policy, optimizer, loss):
+23 -12
View File
@@ -3,7 +3,7 @@ from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.evaluation.postprocessing import compute_advantages, \
Postprocessing
from ray.rllib.policy.tf_policy_template import build_tf_policy
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.framework import try_import_tf, get_variable
from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable
tf1, tf, tfv = try_import_tf()
@@ -36,16 +36,27 @@ class ReweightedImitationLoss:
action_dist, beta):
# advantage estimation
adv = cumulative_rewards - state_values
# update averaged advantage norm
update_adv_norm = tf1.assign_add(
ref=policy._ma_adv_norm,
value=1e-6 * (
if policy.config["framework"] in ["tf2", "tfe"]:
policy._ma_adv_norm.assign_add(
1e-6 * (tf.reduce_mean(
tf.math.square(adv)) - policy._ma_adv_norm))
# Exponentially weighted advantages.
exp_advs = tf.math.exp(
beta * tf.math.divide(
adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
else:
update_adv_norm = tf1.assign_add(
ref=policy._ma_adv_norm,
value=1e-6 * (
tf.reduce_mean(tf.math.square(adv)) - policy._ma_adv_norm))
# exponentially weighted advantages
with tf1.control_dependencies([update_adv_norm]):
exp_advs = tf.math.exp(beta * tf.math.divide(
adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
# exponentially weighted advantages
with tf1.control_dependencies([update_adv_norm]):
exp_advs = tf.math.exp(
beta * tf.math.divide(
adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
# log\pi_\theta(a|s)
logprobs = action_dist.logp(actions)
@@ -125,10 +136,10 @@ def setup_mixins(policy, obs_space, action_space, config):
ValueNetworkMixin.__init__(policy)
# Set up a tf-var for the moving avg (do this here to make it work with
# eager mode).
policy._ma_adv_norm = tf1.get_variable(
name="moving_average_of_advantage_norm",
dtype=tf.float32,
initializer=100.0,
policy._ma_adv_norm = get_variable(
100.0,
framework="tf",
tf_name="moving_average_of_advantage_norm",
trainable=False)
+1 -1
View File
@@ -27,7 +27,7 @@ def pg_tf_loss(policy, model, dist_class, train_batch):
action_dist = dist_class(logits, model)
return -tf.reduce_mean(
action_dist.logp(train_batch[SampleBatch.ACTIONS]) *
train_batch[Postprocessing.ADVANTAGES])
tf.cast(train_batch[Postprocessing.ADVANTAGES], dtype=tf.float32))
PGTFPolicy = build_tf_policy(
+6 -4
View File
@@ -27,7 +27,7 @@ class TestPG(unittest.TestCase):
for _ in framework_iterator(config):
trainer = pg.PGTrainer(config=config, env="CartPole-v0")
for i in range(num_iterations):
trainer.train()
print(trainer.train())
check_compute_single_action(
trainer, include_prev_action_reward=True)
@@ -56,7 +56,7 @@ class TestPG(unittest.TestCase):
trainer = pg.PGTrainer(config=config, env="CartPole-v0")
policy = trainer.get_policy()
vars = policy.model.trainable_variables()
if fw == "tf":
if sess:
vars = policy.get_session().run(vars)
# Post-process (calculate simple (non-GAE) advantages) and attach
@@ -71,13 +71,15 @@ class TestPG(unittest.TestCase):
check(train_batch[Postprocessing.ADVANTAGES], [2.9701, 1.99, 1.0])
# Actual loss results.
if fw == "tf":
if sess:
results = policy.get_session().run(
policy._loss,
feed_dict=policy._get_loss_inputs_dict(
train_batch, shuffle=False))
else:
results = (pg.pg_tf_loss if fw == "tfe" else pg.pg_torch_loss)(
results = (
pg.pg_tf_loss if fw in ["tf2", "tfe"] else pg.pg_torch_loss
)(
policy,
policy.model,
dist_class=dist_cls,
+3 -2
View File
@@ -134,7 +134,7 @@ def validate_config(config):
"Using the simple minibatch optimizer. This will significantly "
"reduce performance, consider simple_optimizer=False.")
# Multi-gpu not supported for PyTorch and tf-eager.
elif config["framework"] in ["tfe", "torch"]:
elif config["framework"] in ["tf2", "tfe", "torch"]:
config["simple_optimizer"] = True
@@ -192,7 +192,8 @@ def execution_plan(workers, config):
num_envs_per_worker=config["num_envs_per_worker"],
train_batch_size=config["train_batch_size"],
shuffle_sequences=config["shuffle_sequences"],
_fake_gpus=config["_fake_gpus"]))
_fake_gpus=config["_fake_gpus"],
framework=config.get("framework")))
# Update KL after each round of training.
train_op = train_op.for_each(lambda t: t[1]).for_each(UpdateKL(workers))
+5 -7
View File
@@ -7,7 +7,7 @@ from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.policy.tf_policy import LearningRateSchedule, \
EntropyCoeffSchedule
from ray.rllib.policy.tf_policy_template import build_tf_policy
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.framework import try_import_tf, get_variable
from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable
tf1, tf, tfv = try_import_tf()
@@ -206,12 +206,10 @@ class KLCoeffMixin:
# KL Coefficient
self.kl_coeff_val = config["kl_coeff"]
self.kl_target = config["kl_target"]
self.kl_coeff = tf1.get_variable(
initializer=tf.constant_initializer(self.kl_coeff_val),
name="kl_coeff",
shape=(),
trainable=False,
dtype=tf.float32)
self.kl_coeff = get_variable(
float(self.kl_coeff_val),
tf_name="kl_coeff",
trainable=False)
def update_kl(self, sampled_kl):
if sampled_kl > 2.0 * self.kl_target:
+4 -4
View File
@@ -93,10 +93,10 @@ class TestPPO(unittest.TestCase):
learnt = False
for i in range(num_iterations):
results = trainer.train()
print(results)
if results["episode_reward_mean"] > 150:
learnt = True
break
print(results)
assert learnt, "PPO multi-GPU (with fake-GPUs) did not learn CartPole!"
trainer.stop()
@@ -180,7 +180,7 @@ class TestPPO(unittest.TestCase):
init_std = get_value()
assert init_std == 0.0, init_std
if fw in ["tf", "tfe"]:
if fw in ["tf2", "tf", "tfe"]:
batch = postprocess_ppo_gae_tf(policy, FAKE_BATCH)
else:
batch = postprocess_ppo_gae_torch(policy, FAKE_BATCH)
@@ -222,7 +222,7 @@ class TestPPO(unittest.TestCase):
# to train_batch dict.
# A = [0.99^2 * 0.5 + 0.99 * -1.0 + 1.0, 0.99 * 0.5 - 1.0, 0.5] =
# [0.50005, -0.505, 0.5]
if fw == "tf" or fw == "tfe":
if fw in ["tf2", "tf", "tfe"]:
train_batch = postprocess_ppo_gae_tf(policy, FAKE_BATCH)
else:
train_batch = postprocess_ppo_gae_torch(policy, FAKE_BATCH)
@@ -233,7 +233,7 @@ class TestPPO(unittest.TestCase):
[0.50005, -0.505, 0.5])
# Calculate actual PPO loss.
if fw == "tfe":
if fw in ["tf2", "tfe"]:
ppo_surrogate_loss_tf(policy, policy.model, Categorical,
train_batch)
elif fw == "torch":
+45 -22
View File
@@ -14,7 +14,8 @@ from ray.rllib.models.tf.tf_action_dist import Beta, Categorical, \
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.policy.tf_policy_template import build_tf_policy
from ray.rllib.utils.error import UnsupportedSpaceException
from ray.rllib.utils.framework import try_import_tf, try_import_tfp
from ray.rllib.utils.framework import get_variable, try_import_tf, \
try_import_tfp
tf1, tf, tfv = try_import_tf()
tfp = try_import_tfp()
@@ -277,7 +278,7 @@ def sac_actor_critic_loss(policy, model, _, train_batch):
def gradients_fn(policy, optimizer, loss):
# Eager: Use GradientTape.
if policy.config["framework"] == "tfe":
if policy.config["framework"] in ["tf2", "tfe"]:
tape = optimizer.tape
pol_weights = policy.model.policy_variables()
actor_grads_and_vars = list(zip(tape.gradient(
@@ -355,10 +356,14 @@ def apply_gradients(policy, optimizer, grads_and_vars):
policy._critic_optimizer[0].apply_gradients(cgrads)
]
alpha_apply_ops = policy._alpha_optimizer.apply_gradients(
policy._alpha_grads_and_vars,
global_step=tf1.train.get_or_create_global_step())
return tf.group([actor_apply_ops, alpha_apply_ops] + critic_apply_ops)
if policy.config["framework"] in ["tf2", "tfe"]:
policy._alpha_optimizer.apply_gradients(policy._alpha_grads_and_vars)
return
else:
alpha_apply_ops = policy._alpha_optimizer.apply_gradients(
policy._alpha_grads_and_vars,
global_step=tf1.train.get_or_create_global_step())
return tf.group([actor_apply_ops, alpha_apply_ops] + critic_apply_ops)
def stats(policy, train_batch):
@@ -379,22 +384,40 @@ def stats(policy, train_batch):
class ActorCriticOptimizerMixin:
def __init__(self, config):
# create global step for counting the number of update operations
self.global_step = tf1.train.get_or_create_global_step()
# use separate optimizers for actor & critic
self._actor_optimizer = tf1.train.AdamOptimizer(
learning_rate=config["optimization"]["actor_learning_rate"])
self._critic_optimizer = [
tf1.train.AdamOptimizer(
learning_rate=config["optimization"]["critic_learning_rate"])
]
if config["twin_q"]:
self._critic_optimizer.append(
tf1.train.AdamOptimizer(learning_rate=config["optimization"][
"critic_learning_rate"]))
self._alpha_optimizer = tf1.train.AdamOptimizer(
learning_rate=config["optimization"]["entropy_learning_rate"])
# - Create global step for counting the number of update operations.
# - Use separate optimizers for actor & critic.
if config["framework"] in ["tf2", "tfe"]:
self.global_step = get_variable(0, tf_name="global_step")
self._actor_optimizer = tf.keras.optimizers.Adam(
learning_rate=config["optimization"]["actor_learning_rate"])
self._critic_optimizer = [
tf.keras.optimizers.Adam(
learning_rate=config["optimization"][
"critic_learning_rate"])
]
if config["twin_q"]:
self._critic_optimizer.append(
tf.keras.optimizers.Adam(
learning_rate=config["optimization"][
"critic_learning_rate"]))
self._alpha_optimizer = tf.keras.optimizers.Adam(
learning_rate=config["optimization"]["entropy_learning_rate"])
else:
self.global_step = tf1.train.get_or_create_global_step()
self._actor_optimizer = tf1.train.AdamOptimizer(
learning_rate=config["optimization"]["actor_learning_rate"])
self._critic_optimizer = [
tf1.train.AdamOptimizer(
learning_rate=config["optimization"][
"critic_learning_rate"])
]
if config["twin_q"]:
self._critic_optimizer.append(
tf1.train.AdamOptimizer(
learning_rate=config["optimization"][
"critic_learning_rate"]))
self._alpha_optimizer = tf1.train.AdamOptimizer(
learning_rate=config["optimization"]["entropy_learning_rate"])
def setup_early_mixins(policy, obs_space, action_space, config):
+55 -10
View File
@@ -5,18 +5,20 @@ import re
import unittest
import ray.rllib.agents.sac as sac
from ray.rllib.agents.sac.sac_tf_policy import sac_actor_critic_loss as tf_loss
from ray.rllib.agents.sac.sac_torch_policy import actor_critic_loss as \
loss_torch
from ray.rllib.models.tf.tf_action_dist import SquashedGaussian
from ray.rllib.models.torch.torch_action_dist import TorchSquashedGaussian
from ray.rllib.execution.replay_buffer import LocalReplayBuffer
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.utils.framework import try_import_torch
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.numpy import fc, relu
from ray.rllib.utils.test_utils import check, check_compute_single_action, \
framework_iterator
from ray.rllib.utils.torch_ops import convert_to_torch_tensor
tf1, tf, tfv = try_import_tf()
torch, _ = try_import_torch()
@@ -166,6 +168,10 @@ class TestSAC(unittest.TestCase):
if weights_dict is None:
assert fw in ["tf", "tfe"] # Start with the tf vars-dict.
weights_dict = policy.get_weights()
if fw == "tfe":
log_alpha = weights_dict[10]
weights_dict = self._translate_tfe_weights(
weights_dict, map_)
else:
assert fw == "torch" # Then transfer that to torch Model.
model_dict = self._translate_weights_to_torch(
@@ -213,6 +219,16 @@ class TestSAC(unittest.TestCase):
tf_a_grads = [g for g, v in tf_a_grads]
tf_e_grads = [g for g, v in tf_e_grads]
elif fw == "tfe":
with tf.GradientTape() as tape:
tf_loss(policy, policy.model, None, input_)
c, a, e, t = policy.critic_loss, policy.actor_loss, \
policy.alpha_loss, policy.td_error
vars = tape.watched_variables()
tf_c_grads = tape.gradient(c[0], vars[6:10])
tf_a_grads = tape.gradient(a, vars[2:6])
tf_e_grads = tape.gradient(e, vars[10])
elif fw == "torch":
loss_torch(policy, policy.model, None, input_)
c, a, e, t = policy.critic_loss, policy.actor_loss, \
@@ -447,15 +463,27 @@ class TestSAC(unittest.TestCase):
# Target q network evaluation.
# target_model.get_q_values
q_tp1 = fc(
relu(
fc(np.concatenate([target_model_out_tp1, policy_tp1], -1),
weights[ks[15]],
weights[ks[14]],
framework=fw)),
weights[ks[17]],
weights[ks[16]],
framework=fw)
if fw == "tf":
q_tp1 = fc(
relu(
fc(np.concatenate([target_model_out_tp1, policy_tp1], -1),
weights[ks[15]],
weights[ks[14]],
framework=fw)),
weights[ks[17]],
weights[ks[16]],
framework=fw)
else:
assert fw == "tfe"
q_tp1 = fc(
relu(
fc(np.concatenate([target_model_out_tp1, policy_tp1], -1),
weights[ks[7]],
weights[ks[6]],
framework=fw)),
weights[ks[9]],
weights[ks[8]],
framework=fw)
q_t_selected = np.squeeze(q_t, axis=-1)
q_tp1 -= alpha * log_pis_tp1
@@ -487,6 +515,23 @@ class TestSAC(unittest.TestCase):
}
return model_dict
def _translate_tfe_weights(self, weights_dict, map_):
model_dict = {
"default_policy/log_alpha": None,
"default_policy/log_alpha_target": None,
"default_policy/sequential/action_1/kernel": weights_dict[2],
"default_policy/sequential/action_1/bias": weights_dict[3],
"default_policy/sequential/action_out/kernel": weights_dict[4],
"default_policy/sequential/action_out/bias": weights_dict[5],
"default_policy/sequential_1/q_hidden_0/kernel": weights_dict[6],
"default_policy/sequential_1/q_hidden_0/bias": weights_dict[7],
"default_policy/sequential_1/q_out/kernel": weights_dict[8],
"default_policy/sequential_1/q_out/bias": weights_dict[9],
"default_policy/value_out/kernel": weights_dict[0],
"default_policy/value_out/bias": weights_dict[1],
}
return model_dict
if __name__ == "__main__":
import pytest
+3 -1
View File
@@ -582,7 +582,9 @@ class Trainer(Trainable):
self.config.pop("eager")
# Enable eager/tracing support.
if tf1 and self.config["framework"] == "tfe":
if tf1 and self.config["framework"] in ["tf2", "tfe"]:
if self.config["framework"] == "tf2" and tfv < 2:
raise ValueError("`framework`=tf2, but tf-version is < 2.0!")
if not tf1.executing_eagerly():
tf1.enable_eager_execution()
logger.info("Executing eagerly, with eager_tracing={}".format(
+2 -2
View File
@@ -283,7 +283,7 @@ class RolloutWorker(ParallelIteratorWorker):
ParallelIteratorWorker.__init__(self, gen_rollouts, False)
policy_config: TrainerConfigDict = policy_config or {}
if (tf1 and policy_config.get("framework") == "tfe"
if (tf1 and policy_config.get("framework") in ["tf2", "tfe"]
and not policy_config.get("no_eager_on_workers")
# This eager check is necessary for certain all-framework tests
# that use tf's eager_mode() context generator.
@@ -959,7 +959,7 @@ class RolloutWorker(ParallelIteratorWorker):
if tf1 and tf1.executing_eagerly():
if hasattr(cls, "as_eager"):
cls = cls.as_eager()
if policy_config["eager_tracing"]:
if policy_config.get("eager_tracing"):
cls = cls.with_tracing()
elif not issubclass(cls, TFPolicy):
pass # could be some other type of policy
+6
View File
@@ -5,9 +5,12 @@ from six.moves import queue
from ray.rllib.evaluation.metrics import get_learner_stats
from ray.rllib.execution.minibatch_buffer import MinibatchBuffer
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.timer import TimerStat
from ray.rllib.utils.window_stat import WindowStat
tf1, tf, tfv = try_import_tf()
class LearnerThread(threading.Thread):
"""Background thread that updates the local model from sample trajectories.
@@ -55,6 +58,9 @@ class LearnerThread(threading.Thread):
self.num_steps = 0
def run(self):
# Switch on eager mode if configured.
if self.local_worker.policy_config.get("framework") in ["tf2", "tfe"]:
tf1.enable_eager_execution()
while not self.stopped:
self.step()
+13 -7
View File
@@ -107,12 +107,14 @@ class TrainTFMultiGPU:
train_batch_size: int,
shuffle_sequences: bool,
policies: List[PolicyID] = frozenset([]),
_fake_gpus: bool = False):
_fake_gpus: bool = False,
framework: str = "tf"):
self.workers = workers
self.policies = policies or workers.local_worker().policies_to_train
self.num_sgd_iter = num_sgd_iter
self.sgd_minibatch_size = sgd_minibatch_size
self.shuffle_sequences = shuffle_sequences
self.framework = framework
# Collect actual devices to use.
if not num_gpus:
@@ -136,8 +138,10 @@ class TrainTFMultiGPU:
with self.workers.local_worker().tf_sess.graph.as_default():
with self.workers.local_worker().tf_sess.as_default():
for policy_id in self.policies:
policy = self.workers.local_worker().get_policy(policy_id)
with tf1.variable_scope(policy_id, reuse=tf1.AUTO_REUSE):
policy = self.workers.local_worker().get_policy(
policy_id)
with tf1.variable_scope(
policy_id, reuse=tf1.AUTO_REUSE):
if policy._state_inputs:
rnn_inputs = policy._state_inputs + [
policy._seq_lens
@@ -146,10 +150,12 @@ class TrainTFMultiGPU:
rnn_inputs = []
self.optimizers[policy_id] = (
LocalSyncParallelOptimizer(
policy._optimizer, self.devices,
[v
for _, v in policy._loss_inputs], rnn_inputs,
self.per_device_batch_size, policy.copy))
policy._optimizer,
self.devices,
[v for _, v in policy._loss_inputs],
rnn_inputs,
self.per_device_batch_size,
policy.copy))
self.sess = self.workers.local_worker().tf_sess
self.sess.run(tf1.global_variables_initializer())
+1 -1
View File
@@ -370,7 +370,7 @@ class ModelCatalog:
"used, however you specified a custom model {}".format(
model_cls))
if framework in ["tf", "tfe"]:
if framework in ["tf", "tfe", "tf2"]:
v2_class = None
# Try to get a default v2 model.
if not model_config.get("custom_model"):
+1
View File
@@ -57,6 +57,7 @@ class Preprocessor:
observation = np.array(observation)
try:
if not self._obs_space.contains(observation):
print()
raise ValueError(
"Observation outside expected value range",
self._obs_space, observation)
+1 -1
View File
@@ -266,7 +266,7 @@ def build_eager_tf_policy(name,
if optimizer_fn:
self._optimizer = optimizer_fn(self, config)
else:
self._optimizer = tf1.train.AdamOptimizer(config["lr"])
self._optimizer = tf.keras.optimizers.Adam(config["lr"])
if after_init:
after_init(self, observation_space, action_space, config)
@@ -1,35 +0,0 @@
import numpy as np
import random
from ray.rllib.policy.policy import Policy
from ray.rllib.utils.annotations import override
class TestPolicy(Policy):
"""A dummy Policy that returns a random (batched) int for compute_actions.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.exploration = self._create_exploration()
@override(Policy)
def compute_actions(self,
obs_batch,
state_batches=None,
prev_action_batch=None,
prev_reward_batch=None,
episodes=None,
explore=None,
timestep=None,
**kwargs):
return np.array([random.choice([0, 1])] * len(obs_batch)), [], {}
@override(Policy)
def compute_log_likelihoods(self,
actions,
obs_batch,
state_batches=None,
prev_action_batch=None,
prev_reward_batch=None):
return np.array([random.random()] * len(obs_batch))
View File
+6 -4
View File
@@ -14,7 +14,7 @@ from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.models.modelv2 import ModelV2
from ray.rllib.utils.annotations import override, DeveloperAPI
from ray.rllib.utils.debug import summarize
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.framework import try_import_tf, get_variable
from ray.rllib.utils.schedules import ConstantSchedule, PiecewiseSchedule
from ray.rllib.utils.tf_run_builder import TFRunBuilder
from ray.rllib.utils.types import ModelGradients, TensorType, TrainerConfigDict
@@ -816,7 +816,8 @@ class LearningRateSchedule:
@DeveloperAPI
def __init__(self, lr, lr_schedule):
self.cur_lr = tf1.get_variable("lr", initializer=lr, trainable=False)
self.cur_lr = tf1.get_variable(
"lr", initializer=lr, trainable=False)
if lr_schedule is None:
self.lr_schedule = ConstantSchedule(lr, framework=None)
else:
@@ -841,8 +842,9 @@ class EntropyCoeffSchedule:
@DeveloperAPI
def __init__(self, entropy_coeff, entropy_coeff_schedule):
self.entropy_coeff = tf1.get_variable(
"entropy_coeff", initializer=entropy_coeff, trainable=False)
self.entropy_coeff = get_variable(
entropy_coeff, framework="tf", tf_name="entropy_coeff",
trainable=False)
if entropy_coeff_schedule is None:
self.entropy_coeff_schedule = ConstantSchedule(
+12 -6
View File
@@ -39,17 +39,23 @@ parser.add_argument(
if __name__ == "__main__":
args = parser.parse_args()
# Bazel regression test mode: Get path to look for yaml files from argv[2].
# Bazel regression test mode: Get path to look for yaml files.
# Get the path or single file to use.
rllib_dir = Path(__file__).parent.parent
print("rllib dir={}".format(rllib_dir))
if not os.path.isdir(os.path.join(rllib_dir, args.yaml_dir)):
abs_yaml_path = os.path.join(rllib_dir, args.yaml_dir)
# Single file given.
if os.path.isfile(abs_yaml_path):
yaml_files = [abs_yaml_path]
# Given path/file does not exist.
elif not os.path.isdir(abs_yaml_path):
raise ValueError("yaml-dir ({}) not found!".format(args.yaml_dir))
yaml_files = rllib_dir.rglob(args.yaml_dir + "/*.yaml")
yaml_files = sorted(
map(lambda path: str(path.absolute()), yaml_files), reverse=True)
# Path given -> Get all yaml files in there via rglob.
else:
yaml_files = rllib_dir.rglob(args.yaml_dir + "/*.yaml")
yaml_files = sorted(
map(lambda path: str(path.absolute()), yaml_files), reverse=True)
print("Will run the following regression tests:")
for yaml_file in yaml_files:
+12 -14
View File
@@ -102,21 +102,19 @@ class ModelCatalogTest(unittest.TestCase):
def test_default_models(self):
ray.init(object_store_memory=1000 * 1024 * 1024)
with tf1.variable_scope("test1"):
p1 = ModelCatalog.get_model_v2(
obs_space=Box(0, 1, shape=(3,), dtype=np.float32),
action_space=Discrete(5),
num_outputs=5,
model_config={})
self.assertEqual(type(p1), FullyConnectedNetwork)
p1 = ModelCatalog.get_model_v2(
obs_space=Box(0, 1, shape=(3, ), dtype=np.float32),
action_space=Discrete(5),
num_outputs=5,
model_config={})
self.assertEqual(type(p1), FullyConnectedNetwork)
with tf1.variable_scope("test2"):
p2 = ModelCatalog.get_model_v2(
obs_space=Box(0, 1, shape=(84, 84, 3), dtype=np.float32),
action_space=Discrete(5),
num_outputs=5,
model_config={})
self.assertEqual(type(p2), VisionNetwork)
p2 = ModelCatalog.get_model_v2(
obs_space=Box(0, 1, shape=(84, 84, 3), dtype=np.float32),
action_space=Discrete(5),
num_outputs=5,
model_config={})
self.assertEqual(type(p2), VisionNetwork)
def test_custom_model(self):
ray.init(object_store_memory=1000 * 1024 * 1024)
+27 -2
View File
@@ -3,6 +3,9 @@ import unittest
import ray
from ray import tune
from ray.rllib.agents.registry import get_agent_class
from ray.rllib.utils.framework import try_import_tf
tf1, tf, tfv = try_import_tf()
def check_support(alg, config, test_eager=False, test_trace=True):
@@ -40,6 +43,22 @@ class TestEagerSupportPG(unittest.TestCase):
def tearDown(self):
ray.shutdown()
def test_simple_q(self):
check_support("SimpleQ", {"num_workers": 0, "learning_starts": 0})
def test_dqn(self):
check_support("DQN", {"num_workers": 0, "learning_starts": 0})
def test_ddpg(self):
check_support("DDPG", {"num_workers": 0})
# TODO(sven): Add these once APEX_DDPG supports eager.
# def test_apex_ddpg(self):
# check_support("APEX_DDPG", {"num_workers": 1})
def test_td3(self):
check_support("TD3", {"num_workers": 0})
def test_a2c(self):
check_support("A2C", {"num_workers": 0})
@@ -100,10 +119,16 @@ class TestEagerSupportOffPolicy(unittest.TestCase):
if __name__ == "__main__":
import pytest
import sys
# Don't test anything for version 2.x (all tests are eager anyways).
# TODO: (sven) remove entire file in the future.
if tfv == 2:
print("\tskip due to tf==2.x")
sys.exit(0)
# One can specify the specific TestCase class to run.
# None for all unittest.TestCase classes in this file.
class_ = sys.argv[1] if len(sys.argv) > 0 else None
import pytest
class_ = sys.argv[1] if len(sys.argv) > 1 else None
sys.exit(pytest.main(
["-v", __file__ + ("" if class_ is None else "::" + class_)]))
+1 -1
View File
@@ -46,7 +46,7 @@ class TestMultiAgentPendulum(unittest.TestCase):
"framework": fw,
},
}
})
}, verbose=1)
if trials[0].last_result["episode_reward_mean"] < -300.0:
raise ValueError("Did not get to -200 reward",
trials[0].last_result)
+3 -2
View File
@@ -15,7 +15,8 @@ def check_support_multiagent(alg, config):
lambda _: MultiAgentCartPole({"num_agents": 2}))
config["log_level"] = "ERROR"
for fw in framework_iterator(config):
if fw == "tfe" and alg in ["A3C", "APEX", "APEX_DDPG", "IMPALA"]:
if fw in ["tf2", "tfe"] and \
alg in ["A3C", "APEX", "APEX_DDPG", "IMPALA"]:
continue
if alg in ["DDPG", "APEX_DDPG", "SAC"]:
a = get_agent_class(alg)(
@@ -123,6 +124,6 @@ if __name__ == "__main__":
import sys
# One can specify the specific TestCase class to run.
# None for all unittest.TestCase classes in this file.
class_ = sys.argv[1] if len(sys.argv) > 0 else None
class_ = sys.argv[1] if len(sys.argv) > 1 else None
sys.exit(pytest.main(
["-v", __file__ + ("" if class_ is None else "::" + class_)]))
+1 -1
View File
@@ -202,6 +202,6 @@ if __name__ == "__main__":
# One can specify the specific TestCase class to run.
# None for all unittest.TestCase classes in this file.
class_ = sys.argv[1] if len(sys.argv) > 0 else None
class_ = sys.argv[1] if len(sys.argv) > 1 else None
sys.exit(pytest.main(
["-v", __file__ + ("" if class_ is None else "::" + class_)]))
+6 -2
View File
@@ -111,9 +111,13 @@ class EpsilonGreedy(Exploration):
),
false_fn=lambda: exploit_action)
assign_op = tf1.assign(self.last_timestep, timestep)
with tf1.control_dependencies([assign_op]):
if self.framework in ["tf2", "tfe"]:
self.last_timestep = timestep
return action, tf.zeros_like(action, dtype=tf.float32)
else:
assign_op = tf1.assign(self.last_timestep, timestep)
with tf1.control_dependencies([assign_op]):
return action, tf.zeros_like(action, dtype=tf.float32)
def _get_torch_exploration_action(self, q_values, explore, timestep):
"""Torch method to produce an epsilon exploration action.
+12 -5
View File
@@ -72,7 +72,7 @@ class GaussianNoise(Exploration):
0, framework=self.framework, tf_name="timestep")
# Build the tf-info-op.
if self.framework == "tf":
if self.framework in ["tf", "tfe"]:
self._tf_info_op = self.get_info()
@override(Exploration)
@@ -123,11 +123,18 @@ class GaussianNoise(Exploration):
logp = tf.zeros(shape=(batch_size,), dtype=tf.float32)
# Increment `last_timestep` by 1 (or set to `timestep`).
assign_op = (
tf1.assign_add(self.last_timestep, 1) if timestep is None else
tf1.assign(self.last_timestep, timestep))
with tf1.control_dependencies([assign_op]):
if self.framework in ["tf2", "tfe"]:
if timestep is None:
self.last_timestep.assign_add(1)
else:
self.last_timestep.assign(timestep)
return action, logp
else:
assign_op = (
tf1.assign_add(self.last_timestep, 1) if timestep is None else
tf1.assign(self.last_timestep, timestep))
with tf1.control_dependencies([assign_op]):
return action, logp
def _get_torch_exploration_action(self, action_dist, explore, timestep):
# Set last timestep or (if not given) increase by one.
@@ -95,7 +95,11 @@ class OrnsteinUhlenbeckNoise(GaussianNoise):
shape=[self.action_space.low.size], stddev=self.stddev)
ou_new = self.ou_theta * -self.ou_state + \
self.ou_sigma * gaussian_sample
ou_state_new = tf1.assign_add(self.ou_state, ou_new)
if self.framework in ["tf2", "tfe"]:
self.ou_state.assign_add(ou_new)
ou_state_new = self.ou_state
else:
ou_state_new = tf1.assign_add(self.ou_state, ou_new)
high_m_low = self.action_space.high - self.action_space.low
high_m_low = tf.where(
tf.math.is_inf(high_m_low), tf.ones_like(high_m_low), high_m_low)
@@ -125,11 +129,18 @@ class OrnsteinUhlenbeckNoise(GaussianNoise):
logp = tf.zeros(shape=(batch_size,), dtype=tf.float32)
# Increment `last_timestep` by 1 (or set to `timestep`).
assign_op = (
tf1.assign_add(self.last_timestep, 1) if timestep is None else
tf1.assign(self.last_timestep, timestep))
with tf1.control_dependencies([assign_op, ou_state_new]):
if self.framework in ["tf2", "tfe"]:
if timestep is None:
self.last_timestep.assign_add(1)
else:
self.last_timestep = timestep
return action, logp
else:
assign_op = (
tf1.assign_add(self.last_timestep, 1) if timestep is None else
tf1.assign(self.last_timestep, timestep))
with tf1.control_dependencies([assign_op, ou_state_new]):
return action, logp
@override(GaussianNoise)
def _get_torch_exploration_action(self, action_dist, explore, timestep):
@@ -27,7 +27,7 @@ def do_test_explorations(run,
core_config["num_workers"] = 0
# Test all frameworks.
for fw in framework_iterator(core_config):
for _ in framework_iterator(core_config):
print("Agent={}".format(run))
# Test for both the default Agent's exploration AND the `Random`
@@ -189,7 +189,7 @@ class TestParameterNoise(unittest.TestCase):
def _get_current_weight(self, policy, fw):
weights = policy.get_weights()
key = 0 if fw == "tfe" else list(weights.keys())[0]
key = 0 if fw in ["tf2", "tfe"] else list(weights.keys())[0]
return weights[key][0][0]
+25 -9
View File
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
def framework_iterator(config=None,
frameworks=("tf", "tfe", "torch"),
frameworks=("tf2", "tf", "tfe", "torch"),
session=False):
"""An generator that allows for looping through n frameworks for testing.
@@ -29,18 +29,23 @@ def framework_iterator(config=None,
config (Optional[dict]): An optional config dict to alter in place
depending on the iteration.
frameworks (Tuple[str]): A list/tuple of the frameworks to be tested.
Allowed are: "tf", "tfe", "torch", and None.
Allowed are: "tf2", "tf", "tfe", "torch", and None.
session (bool): If True and only in the tf-case: Enter a tf.Session()
and yield that as second return value (otherwise yield (fw, None)).
Yields:
str: If enter_session is False:
The current framework ("tf", "tfe", "torch") used.
The current framework ("tf2", "tf", "tfe", "torch") used.
Tuple(str, Union[None,tf.Session]: If enter_session is True:
A tuple of the current fw and the tf.Session if fw="tf".
"""
config = config or {}
frameworks = [frameworks] if isinstance(frameworks, str) else frameworks
frameworks = [frameworks] if isinstance(frameworks, str) else \
list(frameworks)
# Both tf2 and tfe present -> remove "tfe" or "tf2" depending on version.
if "tf2" in frameworks and "tfe" in frameworks:
frameworks.remove("tfe" if tfv == 2 else "tf2")
for fw in frameworks:
# Skip non-installed frameworks.
@@ -53,10 +58,14 @@ def framework_iterator(config=None,
"installed)!".format(fw))
continue
elif fw == "tfe" and not eager_mode:
logger.warning("framework_iterator skipping eager (could not "
logger.warning("framework_iterator skipping tf-eager (could not "
"import `eager_mode` from tensorflow.python)!")
continue
assert fw in ["tf", "tfe", "torch", None]
elif fw == "tf2" and tfv != 2:
logger.warning(
"framework_iterator skipping tf2.x (tf version is < 2.0)!")
continue
assert fw in ["tf2", "tf", "tfe", "torch", None]
# Do we need a test session?
sess = None
@@ -69,10 +78,12 @@ def framework_iterator(config=None,
config["framework"] = fw
eager_ctx = None
if fw == "tfe":
# Enable eager mode for tf2 and tfe.
if fw in ["tf2", "tfe"]:
eager_ctx = eager_mode()
eager_ctx.__enter__()
assert tf1.executing_eagerly()
# Make sure, eager mode is off.
elif fw == "tf":
assert not tf1.executing_eagerly()
@@ -169,8 +180,13 @@ def check(x, y, decimals=5, atol=None, rtol=None, false=False):
if tf1 is not None:
# y should never be a Tensor (y=expected value).
if isinstance(y, tf1.Tensor):
raise ValueError("`y` (expected value) must not be a Tensor. "
"Use numpy.ndarray instead")
# In eager mode, numpyize tensors.
if tf.executing_eagerly():
y = y.numpy()
else:
raise ValueError(
"`y` (expected value) must not be a Tensor. "
"Use numpy.ndarray instead")
if isinstance(x, tf1.Tensor):
# In eager mode, numpyize tensors.
if tf1.executing_eagerly():
+11 -4
View File
@@ -32,11 +32,18 @@ def minimize_and_clip(optimizer, objective, var_list, clip_val=10.0):
# Accidentally passing values < 0.0 will break all gradients.
assert clip_val > 0.0, clip_val
gradients = optimizer.compute_gradients(objective, var_list=var_list)
for i, (grad, var) in enumerate(gradients):
if tf.executing_eagerly():
tape = optimizer.tape
grads_and_vars = list(zip(list(
tape.gradient(objective, var_list)), var_list))
else:
grads_and_vars = optimizer.compute_gradients(
objective, var_list=var_list)
for i, (grad, var) in enumerate(grads_and_vars):
if grad is not None:
gradients[i] = (tf.clip_by_norm(grad, clip_val), var)
return gradients
grads_and_vars[i] = (tf.clip_by_norm(grad, clip_val), var)
return grads_and_vars
def make_tf_callable(session_or_none, dynamic_shape=False):
-1
View File
@@ -1 +0,0 @@
ci/travis