mirror of
https://github.com/wassname/ray.git
synced 2026-07-04 05:01:48 +08:00
@@ -32,7 +32,7 @@ def _setup_logger():
|
||||
def _register_all():
|
||||
|
||||
for key in [
|
||||
"PPO", "ES", "DQN", "APEX", "A3C", "BC", "PG", "DDPG", "APEX_DDPG",
|
||||
"PPO", "ES", "DQN", "APEX", "A3C", "PG", "DDPG", "APEX_DDPG",
|
||||
"IMPALA", "ARS", "A2C", "__fake", "__sigmoid_fake_data",
|
||||
"__parameter_tuning"
|
||||
]:
|
||||
|
||||
@@ -5,12 +5,10 @@ from __future__ import print_function
|
||||
from ray.rllib.agents.a3c.a3c import A3CAgent, DEFAULT_CONFIG as A3C_CONFIG
|
||||
from ray.rllib.optimizers import SyncSamplesOptimizer
|
||||
from ray.rllib.utils import merge_dicts
|
||||
from ray.tune.trial import Resources
|
||||
|
||||
A2C_DEFAULT_CONFIG = merge_dicts(
|
||||
A3C_CONFIG,
|
||||
{
|
||||
"gpu": False,
|
||||
"sample_batch_size": 20,
|
||||
"min_iter_time_s": 10,
|
||||
"sample_async": False,
|
||||
@@ -28,12 +26,3 @@ class A2CAgent(A3CAgent):
|
||||
return SyncSamplesOptimizer(self.local_evaluator,
|
||||
self.remote_evaluators,
|
||||
self.config["optimizer"])
|
||||
|
||||
@classmethod
|
||||
def default_resource_request(cls, config):
|
||||
cf = merge_dicts(cls._default_config, config)
|
||||
return Resources(
|
||||
cpu=1,
|
||||
gpu=cf["gpu_fraction"] if cf["gpu"] else 0,
|
||||
extra_cpu=cf["num_workers"],
|
||||
extra_gpu=cf["use_gpu_for_workers"] and cf["num_workers"] or 0)
|
||||
|
||||
@@ -7,8 +7,6 @@ import time
|
||||
from ray.rllib.agents.a3c.a3c_tf_policy_graph import A3CPolicyGraph
|
||||
from ray.rllib.agents.agent import Agent, with_common_config
|
||||
from ray.rllib.optimizers import AsyncGradientsOptimizer
|
||||
from ray.rllib.utils import merge_dicts
|
||||
from ray.tune.trial import Resources
|
||||
|
||||
# yapf: disable
|
||||
# __sphinx_doc_begin__
|
||||
@@ -29,8 +27,6 @@ DEFAULT_CONFIG = with_common_config({
|
||||
"vf_loss_coeff": 0.5,
|
||||
# Entropy coefficient
|
||||
"entropy_coeff": -0.01,
|
||||
# Whether to place workers on GPUs
|
||||
"use_gpu_for_workers": False,
|
||||
# Min time per iteration
|
||||
"min_iter_time_s": 5,
|
||||
# Workers sample async. Note that this increases the effective
|
||||
@@ -48,15 +44,6 @@ class A3CAgent(Agent):
|
||||
_default_config = DEFAULT_CONFIG
|
||||
_policy_graph = A3CPolicyGraph
|
||||
|
||||
@classmethod
|
||||
def default_resource_request(cls, config):
|
||||
cf = merge_dicts(cls._default_config, config)
|
||||
return Resources(
|
||||
cpu=1,
|
||||
gpu=0,
|
||||
extra_cpu=cf["num_workers"],
|
||||
extra_gpu=cf["use_gpu_for_workers"] and cf["num_workers"] or 0)
|
||||
|
||||
def _init(self):
|
||||
if self.config["use_pytorch"]:
|
||||
from ray.rllib.agents.a3c.a3c_torch_policy_graph import \
|
||||
@@ -68,8 +55,7 @@ class A3CAgent(Agent):
|
||||
self.local_evaluator = self.make_local_evaluator(
|
||||
self.env_creator, policy_cls)
|
||||
self.remote_evaluators = self.make_remote_evaluators(
|
||||
self.env_creator, policy_cls, self.config["num_workers"],
|
||||
{"num_gpus": 1 if self.config["use_gpu_for_workers"] else 0})
|
||||
self.env_creator, policy_cls, self.config["num_workers"])
|
||||
self.optimizer = self._make_optimizer()
|
||||
|
||||
def _make_optimizer(self):
|
||||
|
||||
@@ -17,6 +17,7 @@ from ray.rllib.optimizers.policy_optimizer import PolicyOptimizer
|
||||
from ray.rllib.utils import FilterManager, deep_update, merge_dicts
|
||||
from ray.tune.registry import ENV_CREATOR, _global_registry
|
||||
from ray.tune.trainable import Trainable
|
||||
from ray.tune.trial import Resources
|
||||
from ray.tune.logger import UnifiedLogger
|
||||
from ray.tune.result import DEFAULT_RESULTS_DIR
|
||||
|
||||
@@ -63,11 +64,25 @@ COMMON_CONFIG = {
|
||||
# Whether to use rllib or deepmind preprocessors by default
|
||||
"preprocessor_pref": "deepmind",
|
||||
|
||||
# === Resources ===
|
||||
# Number of actors used for parallelism
|
||||
"num_workers": 2,
|
||||
# Number of GPUs to allocate to the driver. Note that not all algorithms
|
||||
# can take advantage of driver GPUs. This can be fraction (e.g., 0.3 GPUs).
|
||||
"num_gpus": 0,
|
||||
# Number of CPUs to allocate per worker.
|
||||
"num_cpus_per_worker": 1,
|
||||
# Number of GPUs to allocate per worker. This can be fractional.
|
||||
"num_gpus_per_worker": 0,
|
||||
# Any custom resources to allocate per worker.
|
||||
"custom_resources_per_worker": {},
|
||||
# Number of CPUs to allocate for the driver. Note: this only takes effect
|
||||
# when running in Tune.
|
||||
"num_cpus_for_driver": 1,
|
||||
|
||||
# === Execution ===
|
||||
# Number of environments to evaluate vectorwise per worker.
|
||||
"num_envs_per_worker": 1,
|
||||
# Number of actors used for parallelism
|
||||
"num_workers": 2,
|
||||
# Default sample batch size
|
||||
"sample_batch_size": 200,
|
||||
# Training batch size, if applicable. Should be >= sample_batch_size.
|
||||
@@ -104,8 +119,6 @@ COMMON_CONFIG = {
|
||||
},
|
||||
# Whether to LZ4 compress observations
|
||||
"compress_observations": False,
|
||||
# Allocate a fraction of a GPU instead of one (e.g., 0.3 GPUs)
|
||||
"gpu_fraction": 1,
|
||||
# Drop metric batches from unresponsive workers after this many seconds
|
||||
"collect_metrics_timeout": 180,
|
||||
|
||||
@@ -149,6 +162,17 @@ class Agent(Trainable):
|
||||
"tf_session_args", "env_config", "model", "optimizer", "multiagent"
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def default_resource_request(cls, config):
|
||||
cf = dict(cls._default_config, **config)
|
||||
Agent._validate_config(cf)
|
||||
# TODO(ekl): add custom resources here once tune supports them
|
||||
return Resources(
|
||||
cpu=cf["num_cpus_for_driver"],
|
||||
gpu=cf["num_gpus"],
|
||||
extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"],
|
||||
extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"])
|
||||
|
||||
def make_local_evaluator(self, env_creator, policy_graph):
|
||||
"""Convenience method to return configured local evaluator."""
|
||||
|
||||
@@ -163,10 +187,15 @@ class Agent(Trainable):
|
||||
config["local_evaluator_tf_session_args"]
|
||||
}))
|
||||
|
||||
def make_remote_evaluators(self, env_creator, policy_graph, count,
|
||||
remote_args):
|
||||
def make_remote_evaluators(self, env_creator, policy_graph, count):
|
||||
"""Convenience method to return a number of remote evaluators."""
|
||||
|
||||
remote_args = {
|
||||
"num_cpus": self.config["num_cpus_per_worker"],
|
||||
"num_gpus": self.config["num_gpus_per_worker"],
|
||||
"resources": self.config["custom_resources_per_worker"],
|
||||
}
|
||||
|
||||
cls = PolicyEvaluator.as_remote(**remote_args).remote
|
||||
return [
|
||||
self._make_evaluator(cls, env_creator, policy_graph, i + 1,
|
||||
@@ -212,6 +241,21 @@ class Agent(Trainable):
|
||||
"DEFAULT_CONFIG defined by each agent for more info.\n\n"
|
||||
"The config of this agent is: {}".format(config))
|
||||
|
||||
@staticmethod
|
||||
def _validate_config(config):
|
||||
if "gpu" in config:
|
||||
raise ValueError(
|
||||
"The `gpu` config is deprecated, please use `num_gpus=0|1` "
|
||||
"instead.")
|
||||
if "gpu_fraction" in config:
|
||||
raise ValueError(
|
||||
"The `gpu_fraction` config is deprecated, please use "
|
||||
"`num_gpus=<fraction>` instead.")
|
||||
if "use_gpu_for_workers" in config:
|
||||
raise ValueError(
|
||||
"The `use_gpu_for_workers` config is deprecated, please use "
|
||||
"`num_gpus_per_worker=1` instead.")
|
||||
|
||||
def __init__(self, config=None, env=None, logger_creator=None):
|
||||
"""Initialize an RLLib agent.
|
||||
|
||||
@@ -224,6 +268,7 @@ class Agent(Trainable):
|
||||
"""
|
||||
|
||||
config = config or {}
|
||||
Agent._validate_config(config)
|
||||
|
||||
# Vars to synchronize to evaluators on each train call
|
||||
self.global_vars = {"timestep": 0}
|
||||
@@ -370,6 +415,8 @@ class Agent(Trainable):
|
||||
if hasattr(self, "remote_evaluators"):
|
||||
for ev in self.remote_evaluators:
|
||||
ev.__ray_terminate__.remote()
|
||||
if hasattr(self, "optimizer"):
|
||||
self.optimizer.stop()
|
||||
|
||||
def __getstate__(self):
|
||||
state = {}
|
||||
@@ -429,9 +476,6 @@ def get_agent_class(alg):
|
||||
elif alg == "A2C":
|
||||
from ray.rllib.agents import a3c
|
||||
return a3c.A2CAgent
|
||||
elif alg == "BC":
|
||||
from ray.rllib.agents import bc
|
||||
return bc.BCAgent
|
||||
elif alg == "PG":
|
||||
from ray.rllib.agents import pg
|
||||
return pg.PGAgent
|
||||
|
||||
@@ -13,7 +13,6 @@ import time
|
||||
|
||||
import ray
|
||||
from ray.rllib.agents import Agent, with_common_config
|
||||
from ray.tune.trial import Resources
|
||||
|
||||
from ray.rllib.agents.ars import optimizers
|
||||
from ray.rllib.agents.ars import policies
|
||||
@@ -162,11 +161,6 @@ class ARSAgent(Agent):
|
||||
_agent_name = "ARS"
|
||||
_default_config = DEFAULT_CONFIG
|
||||
|
||||
@classmethod
|
||||
def default_resource_request(cls, config):
|
||||
cf = dict(cls._default_config, **config)
|
||||
return Resources(cpu=1, gpu=0, extra_cpu=cf["num_workers"])
|
||||
|
||||
def _init(self):
|
||||
env = self.env_creator(self.config["env_config"])
|
||||
from ray.rllib import models
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
from ray.rllib.agents.bc.bc import BCAgent, DEFAULT_CONFIG
|
||||
|
||||
__all__ = ["BCAgent", "DEFAULT_CONFIG"]
|
||||
@@ -1,99 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import ray
|
||||
from ray.rllib.agents.agent import Agent
|
||||
from ray.rllib.agents.bc.bc_evaluator import BCEvaluator, \
|
||||
GPURemoteBCEvaluator, RemoteBCEvaluator
|
||||
from ray.rllib.optimizers import AsyncGradientsOptimizer
|
||||
from ray.rllib.utils import merge_dicts
|
||||
from ray.tune.trial import Resources
|
||||
|
||||
DEFAULT_CONFIG = {
|
||||
# Number of workers (excluding master)
|
||||
"num_workers": 1,
|
||||
# Size of rollout batch
|
||||
"batch_size": 100,
|
||||
# Max global norm for each gradient calculated by worker
|
||||
"grad_clip": 40.0,
|
||||
# Learning rate
|
||||
"lr": 0.0001,
|
||||
# Whether to use a GPU for local optimization.
|
||||
"gpu": False,
|
||||
# Whether to place workers on GPUs
|
||||
"use_gpu_for_workers": False,
|
||||
# Model and preprocessor options
|
||||
"model": {
|
||||
# (Image statespace) - Converts image to Channels = 1
|
||||
"grayscale": True,
|
||||
# (Image statespace) - Each pixel
|
||||
"zero_mean": False,
|
||||
# (Image statespace) - Converts image to (dim, dim, C)
|
||||
"dim": 84,
|
||||
# (Image statespace) - Converts image shape to (C, dim, dim)
|
||||
"channel_major": False
|
||||
},
|
||||
# Arguments to pass to the rllib optimizer
|
||||
"optimizer": {
|
||||
# Number of gradients applied for each `train` step
|
||||
"grads_per_step": 100,
|
||||
},
|
||||
# Arguments to pass to the env creator
|
||||
"env_config": {},
|
||||
}
|
||||
|
||||
|
||||
class BCAgent(Agent):
|
||||
_agent_name = "BC"
|
||||
_default_config = DEFAULT_CONFIG
|
||||
_allow_unknown_configs = True
|
||||
|
||||
@classmethod
|
||||
def default_resource_request(cls, config):
|
||||
cf = merge_dicts(cls._default_config, config)
|
||||
if cf["use_gpu_for_workers"]:
|
||||
num_gpus_per_worker = cf["gpu_fraction"]
|
||||
else:
|
||||
num_gpus_per_worker = 0
|
||||
return Resources(
|
||||
cpu=1,
|
||||
gpu=cf["gpu"] and cf["gpu_fraction"] or 0,
|
||||
extra_cpu=cf["num_workers"],
|
||||
extra_gpu=num_gpus_per_worker * cf["num_workers"])
|
||||
|
||||
def _init(self):
|
||||
self.local_evaluator = BCEvaluator(self.env_creator, self.config,
|
||||
self.logdir)
|
||||
if self.config["use_gpu_for_workers"]:
|
||||
remote_cls = GPURemoteBCEvaluator
|
||||
else:
|
||||
remote_cls = RemoteBCEvaluator
|
||||
self.remote_evaluators = [
|
||||
remote_cls.remote(self.env_creator, self.config, self.logdir)
|
||||
for _ in range(self.config["num_workers"])
|
||||
]
|
||||
self.optimizer = AsyncGradientsOptimizer(self.local_evaluator,
|
||||
self.remote_evaluators,
|
||||
self.config["optimizer"])
|
||||
|
||||
def _train(self):
|
||||
self.optimizer.step()
|
||||
metric_lists = [
|
||||
re.get_metrics.remote() for re in self.remote_evaluators
|
||||
]
|
||||
total_samples = 0
|
||||
total_loss = 0
|
||||
for metrics in metric_lists:
|
||||
for m in ray.get(metrics):
|
||||
total_samples += m["num_samples"]
|
||||
total_loss += m["loss"]
|
||||
result = dict(
|
||||
mean_loss=total_loss / total_samples,
|
||||
timesteps_this_iter=total_samples,
|
||||
)
|
||||
return result
|
||||
|
||||
def compute_action(self, observation):
|
||||
action, info = self.local_evaluator.policy.compute(observation)
|
||||
return action
|
||||
@@ -1,64 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import pickle
|
||||
from six.moves import queue
|
||||
|
||||
import ray
|
||||
from ray.rllib.agents.bc.experience_dataset import ExperienceDataset
|
||||
from ray.rllib.agents.bc.policy import BCPolicy
|
||||
from ray.rllib.evaluation.interface import EvaluatorInterface
|
||||
from ray.rllib.models import ModelCatalog
|
||||
|
||||
|
||||
class BCEvaluator(EvaluatorInterface):
|
||||
def __init__(self, env_creator, config, logdir):
|
||||
env = ModelCatalog.get_preprocessor_as_wrapper(
|
||||
env_creator(config["env_config"]), config["model"])
|
||||
self.dataset = ExperienceDataset(config["dataset_path"])
|
||||
self.policy = BCPolicy(env.observation_space, env.action_space, config)
|
||||
self.config = config
|
||||
self.logdir = logdir
|
||||
self.metrics_queue = queue.Queue()
|
||||
|
||||
def sample(self):
|
||||
return self.dataset.sample(self.config["batch_size"])
|
||||
|
||||
def compute_gradients(self, samples):
|
||||
gradient, info = self.policy.compute_gradients(samples)
|
||||
self.metrics_queue.put({
|
||||
"num_samples": info["num_samples"],
|
||||
"loss": info["loss"]
|
||||
})
|
||||
return gradient, {}
|
||||
|
||||
def apply_gradients(self, grads):
|
||||
self.policy.apply_gradients(grads)
|
||||
|
||||
def get_weights(self):
|
||||
return self.policy.get_weights()
|
||||
|
||||
def set_weights(self, params):
|
||||
self.policy.set_weights(params)
|
||||
|
||||
def save(self):
|
||||
weights = self.get_weights()
|
||||
return pickle.dumps({"weights": weights})
|
||||
|
||||
def restore(self, objs):
|
||||
objs = pickle.loads(objs)
|
||||
self.set_weights(objs["weights"])
|
||||
|
||||
def get_metrics(self):
|
||||
completed = []
|
||||
while True:
|
||||
try:
|
||||
completed.append(self.metrics_queue.get_nowait())
|
||||
except queue.Empty:
|
||||
break
|
||||
return completed
|
||||
|
||||
|
||||
RemoteBCEvaluator = ray.remote(BCEvaluator)
|
||||
GPURemoteBCEvaluator = ray.remote(num_gpus=1)(BCEvaluator)
|
||||
@@ -1,34 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import itertools
|
||||
import pickle
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class ExperienceDataset(object):
|
||||
def __init__(self, dataset_path):
|
||||
"""Create dataset of experience to imitate.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dataset_path:
|
||||
Path of file containing the database as pickled list of trajectories,
|
||||
each trajectory being a list of steps,
|
||||
each step containing the observation and action as its first two
|
||||
elements.
|
||||
The file must be available on each machine used by a BCEvaluator.
|
||||
"""
|
||||
self._dataset = list(
|
||||
itertools.chain.from_iterable(
|
||||
pickle.load(open(dataset_path, "rb"))))
|
||||
|
||||
def sample(self, batch_size):
|
||||
indexes = np.random.choice(len(self._dataset), batch_size)
|
||||
samples = {
|
||||
'observations': [self._dataset[i][0] for i in indexes],
|
||||
'actions': [self._dataset[i][1] for i in indexes]
|
||||
}
|
||||
return samples
|
||||
@@ -1,106 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
import gym
|
||||
|
||||
import ray
|
||||
from ray.rllib.models.catalog import ModelCatalog
|
||||
|
||||
|
||||
class BCPolicy(object):
|
||||
def __init__(self, obs_space, action_space, config):
|
||||
self.local_steps = 0
|
||||
self.config = config
|
||||
self.summarize = config.get("summarize")
|
||||
self._setup_graph(obs_space, action_space)
|
||||
self.setup_loss(action_space)
|
||||
self.setup_gradients()
|
||||
self.initialize()
|
||||
|
||||
def _setup_graph(self, obs_space, ac_space):
|
||||
self.x = tf.placeholder(tf.float32, [None] + list(obs_space.shape))
|
||||
dist_class, self.logit_dim = ModelCatalog.get_action_dist(
|
||||
ac_space, self.config["model"])
|
||||
self._model = ModelCatalog.get_model(self.x, self.logit_dim,
|
||||
self.config["model"])
|
||||
self.logits = self._model.outputs
|
||||
self.curr_dist = dist_class(self.logits)
|
||||
self.sample = self.curr_dist.sample()
|
||||
self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
|
||||
tf.get_variable_scope().name)
|
||||
|
||||
def setup_loss(self, action_space):
|
||||
if isinstance(action_space, gym.spaces.Box):
|
||||
self.ac = tf.placeholder(
|
||||
tf.float32, [None] + list(action_space.shape), name="ac")
|
||||
elif isinstance(action_space, gym.spaces.Discrete):
|
||||
self.ac = tf.placeholder(tf.int64, [None], name="ac")
|
||||
else:
|
||||
raise NotImplementedError("action space" +
|
||||
str(type(action_space)) +
|
||||
"currently not supported")
|
||||
log_prob = self.curr_dist.logp(self.ac)
|
||||
self.pi_loss = -tf.reduce_sum(log_prob)
|
||||
self.loss = self.pi_loss
|
||||
|
||||
def setup_gradients(self):
|
||||
grads = tf.gradients(self.loss, self.var_list)
|
||||
self.grads, _ = tf.clip_by_global_norm(grads, self.config["grad_clip"])
|
||||
grads_and_vars = list(zip(self.grads, self.var_list))
|
||||
opt = tf.train.AdamOptimizer(self.config["lr"])
|
||||
self._apply_gradients = opt.apply_gradients(grads_and_vars)
|
||||
|
||||
def initialize(self):
|
||||
if self.summarize:
|
||||
bs = tf.to_float(tf.shape(self.x)[0])
|
||||
tf.summary.scalar("model/policy_loss", self.pi_loss / bs)
|
||||
tf.summary.scalar("model/grad_gnorm", tf.global_norm(self.grads))
|
||||
tf.summary.scalar("model/var_gnorm", tf.global_norm(self.var_list))
|
||||
self.summary_op = tf.summary.merge_all()
|
||||
|
||||
# TODO(rliaw): Can consider exposing these parameters
|
||||
self.sess = tf.Session(
|
||||
graph=self.g,
|
||||
config=tf.ConfigProto(
|
||||
intra_op_parallelism_threads=1,
|
||||
inter_op_parallelism_threads=2,
|
||||
gpu_options=tf.GPUOptions(allow_growth=True)))
|
||||
self.variables = ray.experimental.TensorFlowVariables(
|
||||
self.loss, self.sess)
|
||||
self.sess.run(tf.global_variables_initializer())
|
||||
|
||||
def compute_gradients(self, samples):
|
||||
info = {}
|
||||
feed_dict = {
|
||||
self.x: samples["observations"],
|
||||
self.ac: samples["actions"]
|
||||
}
|
||||
self.grads = [g for g in self.grads if g is not None]
|
||||
self.local_steps += 1
|
||||
if self.summarize:
|
||||
loss, grad, summ = self.sess.run(
|
||||
[self.loss, self.grads, self.summary_op], feed_dict=feed_dict)
|
||||
info["summary"] = summ
|
||||
else:
|
||||
loss, grad = self.sess.run(
|
||||
[self.loss, self.grads], feed_dict=feed_dict)
|
||||
info["num_samples"] = len(samples)
|
||||
info["loss"] = loss
|
||||
return grad, info
|
||||
|
||||
def apply_gradients(self, grads):
|
||||
feed_dict = {self.grads[i]: grads[i] for i in range(len(grads))}
|
||||
self.sess.run(self._apply_gradients, feed_dict=feed_dict)
|
||||
|
||||
def get_weights(self):
|
||||
weights = self.variables.get_weights()
|
||||
return weights
|
||||
|
||||
def set_weights(self, weights):
|
||||
self.variables.set_weights(weights)
|
||||
|
||||
def compute(self, ob, *args):
|
||||
action = self.sess.run(self.sample, {self.x: [ob]})
|
||||
return action, None
|
||||
@@ -4,7 +4,6 @@ from __future__ import print_function
|
||||
|
||||
from ray.rllib.agents.ddpg.ddpg import DDPGAgent, DEFAULT_CONFIG as DDPG_CONFIG
|
||||
from ray.rllib.utils import merge_dicts
|
||||
from ray.tune.trial import Resources
|
||||
|
||||
APEX_DDPG_DEFAULT_CONFIG = merge_dicts(
|
||||
DDPG_CONFIG, # see also the options in ddpg.py, which are also supported
|
||||
@@ -17,7 +16,7 @@ APEX_DDPG_DEFAULT_CONFIG = merge_dicts(
|
||||
"debug": False
|
||||
}),
|
||||
"n_step": 3,
|
||||
"gpu": False,
|
||||
"num_gpus": 0,
|
||||
"num_workers": 32,
|
||||
"buffer_size": 2000000,
|
||||
"learning_starts": 50000,
|
||||
@@ -43,15 +42,6 @@ class ApexDDPGAgent(DDPGAgent):
|
||||
_agent_name = "APEX_DDPG"
|
||||
_default_config = APEX_DDPG_DEFAULT_CONFIG
|
||||
|
||||
@classmethod
|
||||
def default_resource_request(cls, config):
|
||||
cf = merge_dicts(cls._default_config, config)
|
||||
return Resources(
|
||||
cpu=1,
|
||||
gpu=cf["gpu"] and cf["gpu_fraction"] or 0,
|
||||
extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"],
|
||||
extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"])
|
||||
|
||||
def update_target_if_needed(self):
|
||||
# Ape-X updates based on num steps trained, not sampled
|
||||
if self.optimizer.num_steps_trained - self.last_target_update_ts > \
|
||||
|
||||
@@ -90,16 +90,10 @@ DEFAULT_CONFIG = with_common_config({
|
||||
"train_batch_size": 256,
|
||||
|
||||
# === Parallelism ===
|
||||
# Whether to use a GPU for local optimization.
|
||||
"gpu": False,
|
||||
# Number of workers for collecting samples with. This only makes sense
|
||||
# to increase if your environment is particularly slow to sample, or if
|
||||
# you"re using the Async or Ape-X optimizers.
|
||||
"num_workers": 0,
|
||||
# Whether to allocate GPUs for workers (if > 0).
|
||||
"num_gpus_per_worker": 0,
|
||||
# Whether to allocate CPUs for workers (if > 0).
|
||||
"num_cpus_per_worker": 1,
|
||||
# Optimizer class to use.
|
||||
"optimizer_class": "SyncReplayOptimizer",
|
||||
# Whether to use a distribution of epsilons across workers for exploration.
|
||||
|
||||
@@ -4,7 +4,6 @@ from __future__ import print_function
|
||||
|
||||
from ray.rllib.agents.dqn.dqn import DQNAgent, DEFAULT_CONFIG as DQN_CONFIG
|
||||
from ray.rllib.utils import merge_dicts
|
||||
from ray.tune.trial import Resources
|
||||
|
||||
# yapf: disable
|
||||
# __sphinx_doc_begin__
|
||||
@@ -19,7 +18,7 @@ APEX_DEFAULT_CONFIG = merge_dicts(
|
||||
"debug": False
|
||||
}),
|
||||
"n_step": 3,
|
||||
"gpu": True,
|
||||
"num_gpus": 1,
|
||||
"num_workers": 32,
|
||||
"buffer_size": 2000000,
|
||||
"learning_starts": 50000,
|
||||
@@ -46,15 +45,6 @@ class ApexAgent(DQNAgent):
|
||||
_agent_name = "APEX"
|
||||
_default_config = APEX_DEFAULT_CONFIG
|
||||
|
||||
@classmethod
|
||||
def default_resource_request(cls, config):
|
||||
cf = merge_dicts(cls._default_config, config)
|
||||
return Resources(
|
||||
cpu=1,
|
||||
gpu=cf["gpu"] and cf["gpu_fraction"] or 0,
|
||||
extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"],
|
||||
extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"])
|
||||
|
||||
def update_target_if_needed(self):
|
||||
# Ape-X updates based on num steps trained, not sampled
|
||||
if self.optimizer.num_steps_trained - self.last_target_update_ts > \
|
||||
|
||||
@@ -8,9 +8,7 @@ from ray.rllib import optimizers
|
||||
from ray.rllib.agents.agent import Agent, with_common_config
|
||||
from ray.rllib.agents.dqn.dqn_policy_graph import DQNPolicyGraph
|
||||
from ray.rllib.evaluation.metrics import collect_metrics
|
||||
from ray.rllib.utils import merge_dicts
|
||||
from ray.rllib.utils.schedules import ConstantSchedule, LinearSchedule
|
||||
from ray.tune.trial import Resources
|
||||
|
||||
OPTIMIZER_SHARED_CONFIGS = [
|
||||
"buffer_size", "prioritized_replay", "prioritized_replay_alpha",
|
||||
@@ -42,8 +40,6 @@ DEFAULT_CONFIG = with_common_config({
|
||||
"hiddens": [256],
|
||||
# N-step Q learning
|
||||
"n_step": 1,
|
||||
# Whether to use rllib or deepmind preprocessors
|
||||
"preprocessor_pref": "deepmind",
|
||||
|
||||
# === Exploration ===
|
||||
# Max num timesteps for annealing schedules. Exploration is annealed from
|
||||
@@ -98,16 +94,10 @@ DEFAULT_CONFIG = with_common_config({
|
||||
"train_batch_size": 32,
|
||||
|
||||
# === Parallelism ===
|
||||
# Whether to use a GPU for local optimization.
|
||||
"gpu": False,
|
||||
# Number of workers for collecting samples with. This only makes sense
|
||||
# to increase if your environment is particularly slow to sample, or if
|
||||
# you"re using the Async or Ape-X optimizers.
|
||||
"num_workers": 0,
|
||||
# Whether to allocate GPUs for workers (if > 0).
|
||||
"num_gpus_per_worker": 0,
|
||||
# Whether to allocate CPUs for workers (if > 0).
|
||||
"num_cpus_per_worker": 1,
|
||||
# Optimizer class to use.
|
||||
"optimizer_class": "SyncReplayOptimizer",
|
||||
# Whether to use a distribution of epsilons across workers for exploration.
|
||||
@@ -128,15 +118,6 @@ class DQNAgent(Agent):
|
||||
_default_config = DEFAULT_CONFIG
|
||||
_policy_graph = DQNPolicyGraph
|
||||
|
||||
@classmethod
|
||||
def default_resource_request(cls, config):
|
||||
cf = merge_dicts(cls._default_config, config)
|
||||
return Resources(
|
||||
cpu=1,
|
||||
gpu=cf["gpu"] and cf["gpu_fraction"] or 0,
|
||||
extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"],
|
||||
extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"])
|
||||
|
||||
def _init(self):
|
||||
# Update effective batch size to include n-step
|
||||
adjusted_batch_size = max(self.config["sample_batch_size"],
|
||||
@@ -163,12 +144,9 @@ class DQNAgent(Agent):
|
||||
self.env_creator, self._policy_graph)
|
||||
|
||||
def create_remote_evaluators():
|
||||
return self.make_remote_evaluators(
|
||||
self.env_creator, self._policy_graph,
|
||||
self.config["num_workers"], {
|
||||
"num_cpus": self.config["num_cpus_per_worker"],
|
||||
"num_gpus": self.config["num_gpus_per_worker"]
|
||||
})
|
||||
return self.make_remote_evaluators(self.env_creator,
|
||||
self._policy_graph,
|
||||
self.config["num_workers"])
|
||||
|
||||
if self.config["optimizer_class"] != "AsyncReplayOptimizer":
|
||||
self.remote_evaluators = create_remote_evaluators()
|
||||
|
||||
@@ -12,12 +12,10 @@ import time
|
||||
|
||||
import ray
|
||||
from ray.rllib.agents import Agent, with_common_config
|
||||
from ray.tune.trial import Resources
|
||||
|
||||
from ray.rllib.agents.es import optimizers
|
||||
from ray.rllib.agents.es import policies
|
||||
from ray.rllib.agents.es import utils
|
||||
from ray.rllib.utils import merge_dicts
|
||||
from ray.rllib.utils import FilterManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -169,11 +167,6 @@ class ESAgent(Agent):
|
||||
_agent_name = "ES"
|
||||
_default_config = DEFAULT_CONFIG
|
||||
|
||||
@classmethod
|
||||
def default_resource_request(cls, config):
|
||||
cf = merge_dicts(cls._default_config, config)
|
||||
return Resources(cpu=1, gpu=0, extra_cpu=cf["num_workers"])
|
||||
|
||||
def _init(self):
|
||||
policy_params = {"action_noise_std": 0.01}
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ from ray.rllib.agents.a3c.a3c_tf_policy_graph import A3CPolicyGraph
|
||||
from ray.rllib.agents.impala.vtrace_policy_graph import VTracePolicyGraph
|
||||
from ray.rllib.agents.agent import Agent, with_common_config
|
||||
from ray.rllib.optimizers import AsyncSamplesOptimizer
|
||||
from ray.tune.trial import Resources
|
||||
|
||||
OPTIMIZER_SHARED_CONFIGS = [
|
||||
"lr",
|
||||
@@ -36,8 +35,6 @@ DEFAULT_CONFIG = with_common_config({
|
||||
"train_batch_size": 500,
|
||||
"min_iter_time_s": 10,
|
||||
"num_workers": 2,
|
||||
"num_cpus_per_worker": 1,
|
||||
"num_gpus_per_worker": 0,
|
||||
# number of GPUs the learner should use.
|
||||
"num_gpus": 1,
|
||||
# set >1 to load data into GPUs in parallel. Increases GPU memory usage
|
||||
@@ -77,15 +74,6 @@ class ImpalaAgent(Agent):
|
||||
_default_config = DEFAULT_CONFIG
|
||||
_policy_graph = VTracePolicyGraph
|
||||
|
||||
@classmethod
|
||||
def default_resource_request(cls, config):
|
||||
cf = dict(cls._default_config, **config)
|
||||
return Resources(
|
||||
cpu=1,
|
||||
gpu=cf["num_gpus"] and cf["num_gpus"] * cf["gpu_fraction"] or 0,
|
||||
extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"],
|
||||
extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"])
|
||||
|
||||
def _init(self):
|
||||
for k in OPTIMIZER_SHARED_CONFIGS:
|
||||
if k not in self.config["optimizer"]:
|
||||
@@ -97,8 +85,7 @@ class ImpalaAgent(Agent):
|
||||
self.local_evaluator = self.make_local_evaluator(
|
||||
self.env_creator, policy_cls)
|
||||
self.remote_evaluators = self.make_remote_evaluators(
|
||||
self.env_creator, policy_cls, self.config["num_workers"],
|
||||
{"num_cpus": 1})
|
||||
self.env_creator, policy_cls, self.config["num_workers"])
|
||||
self.optimizer = AsyncSamplesOptimizer(self.local_evaluator,
|
||||
self.remote_evaluators,
|
||||
self.config["optimizer"])
|
||||
|
||||
@@ -6,18 +6,19 @@ import os
|
||||
import pickle
|
||||
import numpy as np
|
||||
|
||||
from ray.rllib.agents.agent import Agent
|
||||
from ray.rllib.agents.agent import Agent, with_common_config
|
||||
|
||||
|
||||
class _MockAgent(Agent):
|
||||
"""Mock agent for use in tests"""
|
||||
|
||||
_agent_name = "MockAgent"
|
||||
_default_config = {
|
||||
_default_config = with_common_config({
|
||||
"mock_error": False,
|
||||
"persistent_error": False,
|
||||
"test_variable": 1
|
||||
}
|
||||
"test_variable": 1,
|
||||
"num_workers": 0,
|
||||
})
|
||||
|
||||
def _init(self):
|
||||
self.info = None
|
||||
@@ -59,13 +60,14 @@ class _SigmoidFakeData(_MockAgent):
|
||||
This can be helpful for evaluating early stopping algorithms."""
|
||||
|
||||
_agent_name = "SigmoidFakeData"
|
||||
_default_config = {
|
||||
_default_config = with_common_config({
|
||||
"width": 100,
|
||||
"height": 100,
|
||||
"offset": 0,
|
||||
"iter_time": 10,
|
||||
"iter_timesteps": 1,
|
||||
}
|
||||
"num_workers": 0,
|
||||
})
|
||||
|
||||
def _train(self):
|
||||
i = max(0, self.iteration - self.config["offset"])
|
||||
@@ -82,13 +84,14 @@ class _SigmoidFakeData(_MockAgent):
|
||||
class _ParameterTuningAgent(_MockAgent):
|
||||
|
||||
_agent_name = "ParameterTuningAgent"
|
||||
_default_config = {
|
||||
_default_config = with_common_config({
|
||||
"reward_amt": 10,
|
||||
"dummy_param": 10,
|
||||
"dummy_param2": 15,
|
||||
"iter_time": 10,
|
||||
"iter_timesteps": 1
|
||||
}
|
||||
"iter_timesteps": 1,
|
||||
"num_workers": 0,
|
||||
})
|
||||
|
||||
def _train(self):
|
||||
return dict(
|
||||
|
||||
@@ -5,8 +5,6 @@ from __future__ import print_function
|
||||
from ray.rllib.agents.agent import Agent, with_common_config
|
||||
from ray.rllib.agents.pg.pg_policy_graph import PGPolicyGraph
|
||||
from ray.rllib.optimizers import SyncSamplesOptimizer
|
||||
from ray.rllib.utils import merge_dicts
|
||||
from ray.tune.trial import Resources
|
||||
|
||||
# yapf: disable
|
||||
# __sphinx_doc_begin__
|
||||
@@ -31,17 +29,11 @@ class PGAgent(Agent):
|
||||
_default_config = DEFAULT_CONFIG
|
||||
_policy_graph = PGPolicyGraph
|
||||
|
||||
@classmethod
|
||||
def default_resource_request(cls, config):
|
||||
cf = merge_dicts(cls._default_config, config)
|
||||
return Resources(cpu=1, gpu=0, extra_cpu=cf["num_workers"])
|
||||
|
||||
def _init(self):
|
||||
self.local_evaluator = self.make_local_evaluator(
|
||||
self.env_creator, self._policy_graph)
|
||||
self.remote_evaluators = self.make_remote_evaluators(
|
||||
self.env_creator, self._policy_graph, self.config["num_workers"],
|
||||
{})
|
||||
self.env_creator, self._policy_graph, self.config["num_workers"])
|
||||
self.optimizer = SyncSamplesOptimizer(self.local_evaluator,
|
||||
self.remote_evaluators,
|
||||
self.config["optimizer"])
|
||||
|
||||
@@ -6,9 +6,7 @@ import logging
|
||||
|
||||
from ray.rllib.agents import Agent, with_common_config
|
||||
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph
|
||||
from ray.rllib.utils import merge_dicts
|
||||
from ray.rllib.optimizers import SyncSamplesOptimizer, LocalMultiGPUOptimizer
|
||||
from ray.tune.trial import Resources
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -47,12 +45,6 @@ DEFAULT_CONFIG = with_common_config({
|
||||
"vf_clip_param": 10.0,
|
||||
# Target value for KL divergence
|
||||
"kl_target": 0.01,
|
||||
# Number of GPUs to use for SGD
|
||||
"num_gpus": 0,
|
||||
# Whether to allocate GPUs for workers (if > 0).
|
||||
"num_gpus_per_worker": 0,
|
||||
# Whether to allocate CPUs for workers (if > 0).
|
||||
"num_cpus_per_worker": 1,
|
||||
# Whether to rollout "complete_episodes" or "truncate_episodes"
|
||||
"batch_mode": "truncate_episodes",
|
||||
# Which observation filter to apply to the observation
|
||||
@@ -71,24 +63,12 @@ class PPOAgent(Agent):
|
||||
_default_config = DEFAULT_CONFIG
|
||||
_policy_graph = PPOPolicyGraph
|
||||
|
||||
@classmethod
|
||||
def default_resource_request(cls, config):
|
||||
cf = merge_dicts(cls._default_config, config)
|
||||
return Resources(
|
||||
cpu=1,
|
||||
gpu=cf["num_gpus"],
|
||||
extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"],
|
||||
extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"])
|
||||
|
||||
def _init(self):
|
||||
self._validate_config()
|
||||
self.local_evaluator = self.make_local_evaluator(
|
||||
self.env_creator, self._policy_graph)
|
||||
self.remote_evaluators = self.make_remote_evaluators(
|
||||
self.env_creator, self._policy_graph, self.config["num_workers"], {
|
||||
"num_cpus": self.config["num_cpus_per_worker"],
|
||||
"num_gpus": self.config["num_gpus_per_worker"]
|
||||
})
|
||||
self.env_creator, self._policy_graph, self.config["num_workers"])
|
||||
if self.config["simple_optimizer"]:
|
||||
self.optimizer = SyncSamplesOptimizer(
|
||||
self.local_evaluator, self.remote_evaluators, {
|
||||
|
||||
@@ -81,8 +81,9 @@ class PolicyEvaluator(EvaluatorInterface):
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def as_remote(cls, num_cpus=None, num_gpus=None):
|
||||
return ray.remote(num_cpus=num_cpus, num_gpus=num_gpus)(cls)
|
||||
def as_remote(cls, num_cpus=None, num_gpus=None, resources=None):
|
||||
return ray.remote(
|
||||
num_cpus=num_cpus, num_gpus=num_gpus, resources=resources)(cls)
|
||||
|
||||
def __init__(self,
|
||||
env_creator,
|
||||
|
||||
@@ -47,6 +47,8 @@ class CustomModel1(Model):
|
||||
auxiliary_name_scope=False):
|
||||
last_layer = slim.fully_connected(
|
||||
input_dict["obs"], 64, activation_fn=tf.nn.relu, scope="fc1")
|
||||
last_layer = slim.fully_connected(
|
||||
last_layer, 64, activation_fn=tf.nn.relu, scope="fc2")
|
||||
output = slim.fully_connected(
|
||||
last_layer, num_outputs, activation_fn=None, scope="fc_out")
|
||||
return output, last_layer
|
||||
@@ -61,6 +63,8 @@ class CustomModel2(Model):
|
||||
auxiliary_name_scope=False):
|
||||
last_layer = slim.fully_connected(
|
||||
input_dict["obs"], 64, activation_fn=tf.nn.relu, scope="fc1")
|
||||
last_layer = slim.fully_connected(
|
||||
last_layer, 64, activation_fn=tf.nn.relu, scope="fc2")
|
||||
output = slim.fully_connected(
|
||||
last_layer, num_outputs, activation_fn=None, scope="fc_out")
|
||||
return output, last_layer
|
||||
@@ -84,7 +88,7 @@ if __name__ == "__main__":
|
||||
"model": {
|
||||
"custom_model": ["model1", "model2"][i % 2],
|
||||
},
|
||||
"gamma": random.choice([0.5, 0.8, 0.9, 0.95, 0.99]),
|
||||
"gamma": random.choice([0.95, 0.99]),
|
||||
}
|
||||
return (PPOPolicyGraph, obs_space, act_space, config)
|
||||
|
||||
|
||||
@@ -134,9 +134,10 @@ class LearnerThread(threading.Thread):
|
||||
self.grad_timer = TimerStat()
|
||||
self.daemon = True
|
||||
self.weights_updated = False
|
||||
self.stopped = False
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
while not self.stopped:
|
||||
self.step()
|
||||
|
||||
def step(self):
|
||||
@@ -299,6 +300,11 @@ class AsyncReplayOptimizer(PolicyOptimizer):
|
||||
|
||||
return sample_timesteps, train_timesteps
|
||||
|
||||
def stop(self):
|
||||
for r in self.replay_actors:
|
||||
r.__ray_terminate__.remote()
|
||||
self.learner.stopped = True
|
||||
|
||||
def stats(self):
|
||||
replay_stats = ray.get(self.replay_actors[0].stats.remote(self.debug))
|
||||
timing = {
|
||||
|
||||
@@ -49,9 +49,10 @@ class LearnerThread(threading.Thread):
|
||||
self.daemon = True
|
||||
self.weights_updated = False
|
||||
self.stats = {}
|
||||
self.stopped = False
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
while not self.stopped:
|
||||
self.step()
|
||||
|
||||
def step(self):
|
||||
@@ -329,6 +330,9 @@ class AsyncSamplesOptimizer(PolicyOptimizer):
|
||||
|
||||
return sample_timesteps, train_timesteps
|
||||
|
||||
def stop(self):
|
||||
self.learner.stopped = True
|
||||
|
||||
def stats(self):
|
||||
timing = {
|
||||
"{}_time_ms".format(k): round(1000 * self.timers[k].mean, 3)
|
||||
|
||||
@@ -143,6 +143,10 @@ class PolicyOptimizer(object):
|
||||
])
|
||||
return local_result + remote_results
|
||||
|
||||
def stop(self):
|
||||
"""Release any resources used by this optimizer."""
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def _check_not_multiagent(sample_batch):
|
||||
if isinstance(sample_batch, MultiAgentBatch):
|
||||
|
||||
@@ -13,7 +13,7 @@ atari-a2c:
|
||||
clip_rewards: True
|
||||
num_workers: 5
|
||||
num_envs_per_worker: 5
|
||||
gpu: true
|
||||
num_gpus: 1
|
||||
lr_schedule: [
|
||||
[0, 0.0007],
|
||||
[20000000, 0.000000000001],
|
||||
|
||||
@@ -23,7 +23,7 @@ apex:
|
||||
prioritized_replay_alpha: 0.5
|
||||
beta_annealing_fraction: 1.0
|
||||
final_prioritized_replay_beta: 1.0
|
||||
gpu: true
|
||||
num_gpus: 1
|
||||
|
||||
# APEX
|
||||
num_workers: 8
|
||||
|
||||
@@ -27,5 +27,5 @@ basic-dqn:
|
||||
prioritized_replay_alpha: 0.5
|
||||
beta_annealing_fraction: 1.0
|
||||
final_prioritized_replay_beta: 1.0
|
||||
gpu: true
|
||||
num_gpus: 1
|
||||
timesteps_per_iteration: 10000
|
||||
|
||||
@@ -29,5 +29,5 @@ atari-basic-dqn:
|
||||
prioritized_replay_alpha: 0.5
|
||||
beta_annealing_fraction: 1.0
|
||||
final_prioritized_replay_beta: 1.0
|
||||
gpu: true
|
||||
num_gpus: 1
|
||||
timesteps_per_iteration: 10000
|
||||
|
||||
@@ -27,5 +27,5 @@ dueling-ddqn:
|
||||
prioritized_replay_alpha: 0.5
|
||||
beta_annealing_fraction: 1.0
|
||||
final_prioritized_replay_beta: 1.0
|
||||
gpu: true
|
||||
num_gpus: 1
|
||||
timesteps_per_iteration: 10000
|
||||
|
||||
@@ -6,7 +6,7 @@ pong-deterministic-dqn:
|
||||
episode_reward_mean: 20
|
||||
time_total_s: 7200
|
||||
config:
|
||||
gpu: True
|
||||
num_gpus: 1
|
||||
gamma: 0.99
|
||||
lr: .0001
|
||||
learning_starts: 10000
|
||||
|
||||
@@ -29,6 +29,8 @@ class RayTrialExecutor(TrialExecutor):
|
||||
self._avail_resources = Resources(cpu=0, gpu=0)
|
||||
self._committed_resources = Resources(cpu=0, gpu=0)
|
||||
self._resources_initialized = False
|
||||
if ray.is_initialized():
|
||||
self._update_avail_resources()
|
||||
|
||||
def _setup_runner(self, trial):
|
||||
cls = ray.remote(
|
||||
@@ -257,7 +259,16 @@ class RayTrialExecutor(TrialExecutor):
|
||||
self._committed_resources.cpu, self._avail_resources.cpu,
|
||||
self._committed_resources.gpu, self._avail_resources.gpu)
|
||||
else:
|
||||
return ""
|
||||
return "Resources requested: ?"
|
||||
|
||||
def resource_string(self):
|
||||
"""Returns a string describing the total resources available."""
|
||||
|
||||
if self._resources_initialized:
|
||||
return "{} CPUs, {} GPUs".format(self._avail_resources.cpu,
|
||||
self._avail_resources.gpu)
|
||||
else:
|
||||
return "? CPUs, ? GPUs"
|
||||
|
||||
def on_step_begin(self):
|
||||
"""Before step() called, update the available resources."""
|
||||
|
||||
@@ -158,7 +158,11 @@ class TrialExecutor(object):
|
||||
|
||||
def debug_string(self):
|
||||
"""Returns a human readable message for printing to the console."""
|
||||
pass
|
||||
raise NotImplementedError
|
||||
|
||||
def resource_string(self):
|
||||
"""Returns a string describing the total resources available."""
|
||||
raise NotImplementedError
|
||||
|
||||
def restore(self, trial, checkpoint=None):
|
||||
"""Restores training state from a checkpoint.
|
||||
|
||||
@@ -122,13 +122,13 @@ class TrialRunner(object):
|
||||
if not self.has_resources(trial.resources):
|
||||
raise TuneError(
|
||||
("Insufficient cluster resources to launch trial: "
|
||||
"trial requested {} but the cluster summary: {} "
|
||||
"trial requested {} but the cluster has only {}. "
|
||||
"Pass `queue_trials=True` in "
|
||||
"ray.tune.run_experiments() or on the command "
|
||||
"line to queue trials until the cluster scales "
|
||||
"up. {}").format(
|
||||
trial.resources.summary_string(),
|
||||
self.trial_executor.debug_string(),
|
||||
self.trial_executor.resource_string(),
|
||||
trial._get_trainable_cls().resource_help(
|
||||
trial.config)))
|
||||
elif trial.status == Trial.PAUSED:
|
||||
|
||||
Reference in New Issue
Block a user