[rllib] Add the ability to run arbitrary Python scripts with ray.tune (#1132)

* fix yaml bug

* add ext agent

* gpus

* update

* tuning

* docs

* Sun Oct 15 21:09:25 PDT 2017

* lint

* update

* Sun Oct 15 22:39:55 PDT 2017

* Sun Oct 15 22:40:17 PDT 2017

* Sun Oct 15 22:43:06 PDT 2017

* Sun Oct 15 22:46:06 PDT 2017

* Sun Oct 15 22:46:21 PDT 2017

* Sun Oct 15 22:48:11 PDT 2017

* Sun Oct 15 22:48:44 PDT 2017

* Sun Oct 15 22:49:23 PDT 2017

* Sun Oct 15 22:50:21 PDT 2017

* Sun Oct 15 22:53:00 PDT 2017

* Sun Oct 15 22:53:34 PDT 2017

* Sun Oct 15 22:54:33 PDT 2017

* Sun Oct 15 22:54:50 PDT 2017

* Sun Oct 15 22:55:20 PDT 2017

* Sun Oct 15 22:56:56 PDT 2017

* Sun Oct 15 22:59:03 PDT 2017

* fix

* Update tune_mnist_ray.py

* remove script trial

* fix

* reorder

* fix ex

* py2 support

* upd

* comments

* comments

* cleanup readme

* fix trial

* annotate

* Update rllib.rst
This commit is contained in:
Eric Liang
2017-10-18 11:49:28 -07:00
committed by Richard Liaw
parent 4157bcb80b
commit 5a50e0e1d7
24 changed files with 745 additions and 166 deletions
+2 -1
View File
@@ -9,11 +9,12 @@ import six.moves.queue as queue
import os
import ray
from ray.rllib.agent import Agent
from ray.rllib.a3c.runner import RunnerThread, process_rollout
from ray.rllib.a3c.envs import create_and_wrap
from ray.rllib.common import Agent, TrainingResult
from ray.rllib.a3c.shared_model import SharedModel
from ray.rllib.a3c.shared_model_lstm import SharedModelLSTM
from ray.tune.result import TrainingResult
DEFAULT_CONFIG = {
@@ -2,7 +2,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import namedtuple
from datetime import datetime
import json
@@ -16,6 +15,7 @@ import time
import uuid
import tensorflow as tf
from ray.tune.result import TrainingResult
if sys.version_info[0] == 2:
import cStringIO as StringIO
@@ -26,39 +26,6 @@ logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
TrainingResult = namedtuple("TrainingResult", [
# Unique string identifier for this experiment. This id is preserved
# across checkpoint / restore calls.
"experiment_id",
# The index of this training iteration, e.g. call to train().
"training_iteration",
# The mean episode reward reported during this iteration.
"episode_reward_mean",
# The mean episode length reported during this iteration.
"episode_len_mean",
# Agent-specific metadata to report for this iteration.
"info",
# Number of timesteps in the simulator in this iteration.
"timesteps_this_iter",
# Accumulated timesteps for this entire experiment.
"timesteps_total",
# Time in seconds this iteration took to run.
"time_this_iter_s",
# Accumulated time in seconds for this entire experiment.
"time_total_s",
])
TrainingResult.__new__.__defaults__ = (None,) * len(TrainingResult._fields)
class Agent(object):
"""All RLlib agents extend this base class.
@@ -71,6 +38,8 @@ class Agent(object):
logdir (str): Directory in which training outputs should be placed.
"""
_allow_unknown_configs = False
def __init__(
self, env_creator, config, local_dir='/tmp/ray',
upload_dir=None, agent_id=None):
@@ -97,11 +66,12 @@ class Agent(object):
self.env_creator = env_creator
self.config = self._default_config.copy()
for k in config.keys():
if k not in self.config:
raise Exception(
"Unknown agent config `{}`, "
"all agent configs: {}".format(k, self.config.keys()))
if not self._allow_unknown_configs:
for k in config.keys():
if k not in self.config:
raise Exception(
"Unknown agent config `{}`, "
"all agent configs: {}".format(k, self.config.keys()))
self.config.update(config)
self.config.update({
"agent_id": agent_id,
@@ -112,7 +82,7 @@ class Agent(object):
logdir_suffix = "{}_{}_{}".format(
env_name,
self.__class__.__name__,
self._agent_name,
agent_id or datetime.today().strftime("%Y-%m-%d_%H-%M-%S"))
if not os.path.exists(local_dir):
@@ -128,12 +98,12 @@ class Agent(object):
# TODO(ekl) consider inlining config into the result jsons
config_out = os.path.join(self.logdir, "config.json")
with open(config_out, "w") as f:
json.dump(self.config, f, sort_keys=True, cls=RLLibEncoder)
json.dump(self.config, f, sort_keys=True, cls=_Encoder)
logger.info(
"%s algorithm created with logdir '%s' and upload uri '%s'",
"%s agent created with logdir '%s' and upload uri '%s'",
self.__class__.__name__, self.logdir, log_upload_uri)
self._result_logger = RLLibLogger(
self._result_logger = _Logger(
os.path.join(self.logdir, "result.json"),
log_upload_uri and os.path.join(log_upload_uri, "result.json"))
self._file_writer = tf.summary.FileWriter(self.logdir)
@@ -162,6 +132,8 @@ class Agent(object):
self._iteration += 1
time_this_iter = time.time() - start
assert result.timesteps_this_iter is not None
self._time_total += time_this_iter
self._timesteps_total += result.timesteps_this_iter
@@ -170,10 +142,9 @@ class Agent(object):
training_iteration=self._iteration,
timesteps_total=self._timesteps_total,
time_this_iter_s=time_this_iter,
time_total_s=self._time_total)
for field in result:
assert field is not None, result
time_total_s=self._time_total,
pid=os.getpid(),
hostname=os.uname()[1])
self._log_result(result)
@@ -184,18 +155,18 @@ class Agent(object):
# We need to use a custom json serializer class so that NaNs get
# encoded as null as required by Athena.
json.dump(result._asdict(), self._result_logger, cls=RLLibEncoder)
json.dump(result._asdict(), self._result_logger, cls=_Encoder)
self._result_logger.write("\n")
train_stats = tf.Summary(value=[
tf.Summary.Value(
tag="rllib/time_this_iter_s",
simple_value=result.time_this_iter_s),
tf.Summary.Value(
tag="rllib/episode_reward_mean",
simple_value=result.episode_reward_mean),
tf.Summary.Value(
tag="rllib/episode_len_mean",
simple_value=result.episode_len_mean)])
attrs_to_log = [
"time_this_iter_s", "mean_loss", "mean_accuracy",
"episode_reward_mean", "episode_len_mean"]
values = []
for attr in attrs_to_log:
if getattr(result, attr) is not None:
values.append(tf.Summary.Value(
tag="ray/tune/{}".format(attr),
simple_value=getattr(result, attr)))
train_stats = tf.Summary(value=values)
self._file_writer.add_summary(train_stats, result.training_iteration)
def save(self):
@@ -269,10 +240,10 @@ class Agent(object):
raise NotImplementedError
class RLLibEncoder(json.JSONEncoder):
class _Encoder(json.JSONEncoder):
def __init__(self, nan_str="null", **kwargs):
super(RLLibEncoder, self).__init__(**kwargs)
super(_Encoder, self).__init__(**kwargs)
self.nan_str = nan_str
def iterencode(self, o, _one_shot=False):
@@ -299,7 +270,7 @@ class RLLibEncoder(json.JSONEncoder):
return int(value)
class RLLibLogger(object):
class _Logger(object):
"""Writing small amounts of data to S3 with real-time updates.
"""
@@ -322,3 +293,44 @@ class RLLibLogger(object):
with self.smart_open(self.uri, "w") as f:
self.result_buffer.write(b)
f.write(self.result_buffer.getvalue())
class _MockAgent(Agent):
"""Mock agent for use in tests"""
_agent_name = "MockAgent"
_default_config = {}
def _init(self):
pass
def _train(self):
return TrainingResult(
episode_reward_mean=10, episode_len_mean=10,
timesteps_this_iter=10, info={})
def get_agent_class(alg):
"""Returns the class of an known agent given its name."""
if alg == "PPO":
from ray.rllib import ppo
return ppo.PPOAgent
elif alg == "ES":
from ray.rllib import es
return es.ESAgent
elif alg == "DQN":
from ray.rllib import dqn
return dqn.DQNAgent
elif alg == "A3C":
from ray.rllib import a3c
return a3c.A3CAgent
elif alg == "script":
from ray.tune import script_runner
return script_runner.ScriptRunner
elif alg == "__fake":
return _MockAgent
else:
raise Exception(
("Unknown algorithm {}, check --alg argument. Valid choices " +
"are PPO, ES, DQN, and A3C.").format(alg))
-43
View File
@@ -1,43 +0,0 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ray.rllib.common import Agent, TrainingResult
class _MockAgent(Agent):
"""Mock agent for use in tests"""
_agent_name = "MockAgent"
_default_config = {}
def _init(self):
pass
def _train(self):
return TrainingResult(
episode_reward_mean=10, episode_len_mean=10,
timesteps_this_iter=10, info={})
def get_agent_class(alg):
"""Returns the class of an known agent given its name."""
if alg == "PPO":
from ray.rllib import ppo
return ppo.PPOAgent
elif alg == "ES":
from ray.rllib import es
return es.ESAgent
elif alg == "DQN":
from ray.rllib import dqn
return dqn.DQNAgent
elif alg == "A3C":
from ray.rllib import a3c
return a3c.A3CAgent
elif alg == "__fake":
return _MockAgent
else:
raise Exception(
("Unknown algorithm {}, check --alg argument. Valid choices " +
"are PPO, ES, DQN, and A3C.").format(alg))
+2 -1
View File
@@ -10,11 +10,12 @@ import os
import tensorflow as tf
import ray
from ray.rllib.common import Agent, TrainingResult
from ray.rllib.agent import Agent
from ray.rllib.dqn import logger, models
from ray.rllib.dqn.common.wrappers import wrap_dqn
from ray.rllib.dqn.common.schedules import LinearSchedule
from ray.rllib.dqn.replay_buffer import ReplayBuffer, PrioritizedReplayBuffer
from ray.tune.result import TrainingResult
"""The default configuration dict for the DQN algorithm.
+2 -1
View File
@@ -12,7 +12,7 @@ import pickle
import time
import ray
from ray.rllib.common import Agent, TrainingResult
from ray.rllib.agent import Agent
from ray.rllib.models import ModelCatalog
from ray.rllib.es import optimizers
@@ -20,6 +20,7 @@ from ray.rllib.es import policies
from ray.rllib.es import tabular_logger as tlogger
from ray.rllib.es import tf_util
from ray.rllib.es import utils
from ray.tune.result import TrainingResult
Result = namedtuple("Result", [
+2 -1
View File
@@ -11,7 +11,8 @@ import tensorflow as tf
from tensorflow.python import debug as tf_debug
import ray
from ray.rllib.common import Agent, TrainingResult
from ray.rllib.agent import Agent
from ray.tune.result import TrainingResult
from ray.rllib.ppo.runner import Runner, RemoteRunner
from ray.rllib.ppo.rollout import collect_samples
from ray.rllib.ppo.utils import shuffle
@@ -8,7 +8,7 @@ import numpy as np
import ray
import random
from ray.rllib.agents import get_agent_class
from ray.rllib.agent import get_agent_class
def get_mean_action(alg, obs):
+16 -3
View File
@@ -34,14 +34,18 @@ parser = make_parser("Train a reinforcement learning agent.")
# defined there.
parser.add_argument("--redis-address", default=None, type=str,
help="The Redis address of the cluster.")
parser.add_argument("--num-cpus", default=None, type=int,
help="Number of CPUs to allocate to Ray.")
parser.add_argument("--num-gpus", default=None, type=int,
help="Number of GPUs to allocate to Ray.")
parser.add_argument("--restore", default=None, type=str,
help="If specified, restore from this checkpoint.")
parser.add_argument("-f", "--config-file", default=None, type=str,
help="If specified, use config options from this file.")
if __name__ == "__main__":
args = parser.parse_args()
def main(argv):
args = parser.parse_args(argv)
runner = TrialRunner()
if args.config_file:
@@ -56,7 +60,9 @@ if __name__ == "__main__":
args.resources, args.stop, args.checkpoint_freq,
args.restore, args.upload_dir))
ray.init(redis_address=args.redis_address)
ray.init(
redis_address=args.redis_address, num_cpus=args.num_cpus,
num_gpus=args.num_gpus)
while not runner.is_finished():
runner.step()
@@ -64,4 +70,11 @@ if __name__ == "__main__":
for trial in runner.get_trials():
if trial.status != Trial.TERMINATED:
print("Exit 1")
sys.exit(1)
print("Exit 0")
if __name__ == "__main__":
main(sys.argv[1:])
@@ -5,4 +5,4 @@ hopper-ppo:
resources:
cpu: 64
gpu: 4
config: {"gamma": 0.995, "kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": 1e-4, "sgd_batchsize": 32768, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 160000, "num_workers": 64}
config: {"gamma": 0.995, "kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": .0001, "sgd_batchsize": 32768, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 160000, "num_workers": 64}
@@ -7,5 +7,5 @@ humanoid-ppo-gae:
resources:
cpu: 64
gpu: 4
config: {"lambda": 0.95, "clip_param": 0.2, "kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": 1e-4, "sgd_batchsize": 32768, "horizon": 5000, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_workers": 64, "model": {"free_log_std": true}, "write_logs": false}
config: {"lambda": 0.95, "clip_param": 0.2, "kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": .0001, "sgd_batchsize": 32768, "horizon": 5000, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_workers": 64, "model": {"free_log_std": true}, "write_logs": false}
@@ -7,4 +7,4 @@ humanoid-ppo:
resources:
cpu: 64
gpu: 4
config: {"kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": 1e-4, "sgd_batchsize": 32768, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_workers": 64, "model": {"free_log_std": true}, "use_gae": false}
config: {"kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": .0001, "sgd_batchsize": 32768, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_workers": 64, "model": {"free_log_std": true}, "use_gae": false}
@@ -5,4 +5,4 @@ walker2d-v1-ppo:
resources:
cpu: 64
gpu: 4
config: {"kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": 1e-4, "sgd_batchsize": 32768, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_workers": 64}
config: {"kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": .0001, "sgd_batchsize": 32768, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_workers": 64}
+113 -26
View File
@@ -1,24 +1,123 @@
Ray.tune: Fast hyperparameter search
====================================
Parallel hyperparameter evaluation with Ray
===========================================
Using ray.tune with RLlib
-------------------------
Using ray.tune for deep neural network training
-----------------------------------------------
One way to use ray.tune is through RLlib's train.py script. The train.py script
supports two modes. For example, to run multiple concurrent trials of Pong:
With only a couple changes, you can parallelize evaluation of any existing
Python script with Ray.tune.
- Inline args: ``./train.py --env=Pong-v0 --alg=PPO --num_trials=8 --stop '{"time_total_s": 3200}' --resources '{"cpu": 8, "gpu": 2}' --config '{"num_workers": 8, "sgd_num_iter": 10}'``
First, you must define a ``train(config, status_reporter)`` function in your
script. This will be the entry point which Ray will call into.
- File-based: ``./train.py -f tune-pong.yaml``
.. code:: python
Both delegate scheduling of trials to the ray.tune TrialRunner class.
Additionally, the file-based mode supports hyper-parameter tuning
(currently just grid and random search).
def train(config, status_reporter):
pass
To specify search parameters, variables in the `config` section may be set to
different values for each trial. You can either specify `grid_search: <list>`
Second, you should periodically report training status by passing a
``TrainingResult`` tuple to ``status_reporter.report()``.
.. code:: python
from ray.tune.result import TrainingResult
def train(config, status_reporter):
for step in range(1000):
# do a training iteration
status_reporter.report(TrainingResult(
timesteps_total=step, # required
mean_loss=train_loss, # optional
mean_accuracy=train_accuracy # optional
))
You can then launch a hyperparameter tuning run by running ``tune.py``.
For example:
.. code:: bash
cd python/ray/tune
./tune.py -f examples/tune_mnist_ray.yaml
The YAML or JSON file passed to ``tune.py`` specifies the configuration of the
trials to launch. For example, the following YAML describes a grid search over
activation functions.
.. code:: yaml
tune_mnist:
env: mnist
alg: script
num_trials: 10
resources:
cpu: 1
stop:
mean_accuracy: 0.99
time_total_s: 600
config:
script_file_path: examples/tune_mnist_ray.py
script_entrypoint: train
activation:
grid_search: ['relu', 'elu', 'tanh']
When run, ``./tune.py`` will schedule the trials on Ray, creating a new local
Ray cluster if an existing cluster address is not specified. Incremental
status will be reported on the command line, and you can also view the reported
metrics using Tensorboard:
.. code:: text
== Status ==
Resources used: 4/4 CPUs, 0/0 GPUs
Tensorboard logdir: /tmp/ray/tune_mnist
- script_mnist_0_activation=relu: RUNNING [pid=27708], 16 s, 20 ts, 0.46 acc
- script_mnist_1_activation=elu: RUNNING [pid=27709], 16 s, 20 ts, 0.54 acc
- script_mnist_2_activation=tanh: RUNNING [pid=27711], 18 s, 20 ts, 0.74 acc
- script_mnist_3_activation=relu: RUNNING [pid=27713], 12 s, 10 ts, 0.22 acc
- script_mnist_4_activation=elu: PENDING
- script_mnist_5_activation=tanh: PENDING
- script_mnist_6_activation=relu: PENDING
- script_mnist_7_activation=elu: PENDING
- script_mnist_8_activation=tanh: PENDING
- script_mnist_9_activation=relu: PENDING
Note that if your script requires GPUs, you should specify the number of gpus
required per trial in the ``resources`` section. Additionally, Ray should be
initialized with the ``--num-gpus`` argument (you can also pass this argument
to ``tune.py``).
Using ray.tune as a library
---------------------------
Ray.tune can also be called programmatically from Python code. This allows for
finer-grained control over trial setup and scheduling. Some examples of
calling ray.tune programmatically include:
- ``python/ray/tune/examples/tune_mnist_ray.py``
- ``python/ray/rllib/train.py``
Using ray.tune with Ray RLlib
-----------------------------
Another way to use ray.tune is through RLlib's ``python/ray/rllib/train.py``
script. This script allows you to select between different RL algorithms with
the ``--alg`` option. For example, to train pong with the A3C algorithm, run:
- ``./train.py --env=PongDeterministic-v4 --alg=A3C --num-trials=8 --stop '{"time_total_s": 3200}' --resources '{"cpu": 8}' --config '{"num_workers": 8}'``
or
- ``./train.py -f tuned_examples/pong-a3c.yaml``
You can find more RLlib examples in ``python/ray/rllib/tuned_examples``.
Specifying search parameters
----------------------------
To specify search parameters, variables in the ``config`` section may be set to
different values for each trial. You can either specify ``grid_search: <list>``
in place of a concrete value to specify a grid search across the list of
values, or `eval: <str>` for values to be sampled from the given Python
values, or ``eval: <str>`` for values to be sampled from the given Python
expression.
.. code:: yaml
@@ -40,15 +139,3 @@ expression.
grid_search: [128, 256, 512]
lr:
eval: random.uniform(1e-4, 1e-3)
See ray/rllib/tuned_examples for more examples of configs in YAML form.
Using ray.tune to run custom scripts
------------------------------------
TODO
Using ray.tune as a library
---------------------------
TODO
+5 -6
View File
@@ -8,7 +8,6 @@ import json
import numpy as np
import os
import random
import sys
from ray.tune.trial import Trial, Resources
@@ -32,13 +31,13 @@ def make_parser(description):
parser.add_argument("--resources", default='{"cpu": 1}',
type=_resource_json,
help="Amount of resources to allocate per trial.")
parser.add_argument("--num_trials", default=1, type=int,
parser.add_argument("--num-trials", default=1, type=int,
help="Number of trials to evaluate.")
parser.add_argument("--local_dir", default="/tmp/ray", type=str,
parser.add_argument("--local-dir", default="/tmp/ray", type=str,
help="Local dir to save training results to.")
parser.add_argument("--upload_dir", default=None, type=str,
parser.add_argument("--upload-dir", default=None, type=str,
help="URI to upload training results to.")
parser.add_argument("--checkpoint_freq", default=sys.maxsize, type=int,
parser.add_argument("--checkpoint-freq", default=None, type=int,
help="How many iterations between checkpoints.")
# TODO(ekl) environments are RL specific
@@ -73,7 +72,7 @@ def parse_to_trials(config):
def to_argv(config):
argv = []
for k, v in config.items():
argv.append("--{}".format(k))
argv.append("--{}".format(k.replace("_", "-")))
if type(v) is str:
argv.append(v)
else:
+223
View File
@@ -0,0 +1,223 @@
#!/usr/bin/env python
#
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A deep MNIST classifier using convolutional layers.
See extensive documentation at
https://www.tensorflow.org/get_started/mnist/pros
"""
# Disable linter warnings to maintain consistency with tutorial.
# pylint: disable=invalid-name
# pylint: disable=g-bad-import-order
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
import tempfile
import os
import ray
from ray.tune.result import TrainingResult
from ray.tune.trial import Trial
from ray.tune.trial_runner import TrialRunner
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
FLAGS = None
status_reporter = None # used to report training status back to Ray
activation_fn = None # e.g. tf.nn.relu
def deepnn(x):
"""deepnn builds the graph for a deep net for classifying digits.
Args:
x: an input tensor with the dimensions (N_examples, 784), where 784 is
the number of pixels in a standard MNIST image.
Returns:
A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with
values equal to the logits of classifying the digit into one of 10
classes (the digits 0-9). keep_prob is a scalar placeholder for the
probability of dropout.
"""
# Reshape to use within a convolutional neural net.
# Last dimension is for "features" - there is only one here, since images
# are grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
with tf.name_scope('reshape'):
x_image = tf.reshape(x, [-1, 28, 28, 1])
# First convolutional layer - maps one grayscale image to 32 feature maps.
with tf.name_scope('conv1'):
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = activation_fn(conv2d(x_image, W_conv1) + b_conv1)
# Pooling layer - downsamples by 2X.
with tf.name_scope('pool1'):
h_pool1 = max_pool_2x2(h_conv1)
# Second convolutional layer -- maps 32 feature maps to 64.
with tf.name_scope('conv2'):
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = activation_fn(conv2d(h_pool1, W_conv2) + b_conv2)
# Second pooling layer.
with tf.name_scope('pool2'):
h_pool2 = max_pool_2x2(h_conv2)
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
# is down to 7x7x64 feature maps -- maps this to 1024 features.
with tf.name_scope('fc1'):
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = activation_fn(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# Dropout - controls the complexity of the model, prevents co-adaptation of
# features.
with tf.name_scope('dropout'):
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# Map the 1024 features to 10 classes, one for each digit
with tf.name_scope('fc2'):
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
return y_conv, keep_prob
def conv2d(x, W):
"""conv2d returns a 2d convolution layer with full stride."""
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
"""max_pool_2x2 downsamples a feature map by 2X."""
return tf.nn.max_pool(
x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def weight_variable(shape):
"""weight_variable generates a weight variable of a given shape."""
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
"""bias_variable generates a bias variable of a given shape."""
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def main(_):
# Import data
mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
# Create the model
x = tf.placeholder(tf.float32, [None, 784])
# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, 10])
# Build the graph for the deep net
y_conv, keep_prob = deepnn(x)
with tf.name_scope('loss'):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
labels=y_, logits=y_conv)
cross_entropy = tf.reduce_mean(cross_entropy)
with tf.name_scope('adam_optimizer'):
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
correct_prediction = tf.cast(correct_prediction, tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
graph_location = tempfile.mkdtemp()
print('Saving graph to: %s' % graph_location)
train_writer = tf.summary.FileWriter(graph_location)
train_writer.add_graph(tf.get_default_graph())
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(20000):
batch = mnist.train.next_batch(50)
if i % 10 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
# !!! Report status to ray.tune !!!
if status_reporter:
status_reporter.report(TrainingResult(
timesteps_total=i,
mean_accuracy=train_accuracy))
print('step %d, training accuracy %g' % (i, train_accuracy))
train_step.run(
feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print('test accuracy %g' % accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
# !!! Entrypoint for ray.tune !!!
def train(config={'activation': 'relu'}, reporter=None):
global FLAGS, status_reporter, activation_fn
status_reporter = reporter
activation_fn = getattr(tf.nn, config['activation'])
parser = argparse.ArgumentParser()
parser.add_argument(
'--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
# !!! Example of using the ray.tune Python API !!!
if __name__ == '__main__':
runner = TrialRunner()
for act in ['relu', 'elu', 'tanh']:
runner.add_trial(
Trial(
'mnist', 'script',
stopping_criterion={
'mean_accuracy': 0.99, 'time_total_s': 600},
config={
'script_file_path': os.path.abspath(__file__),
'script_min_iter_time_s': 1,
'activation': act,
},
agent_id='act={}'.format(act)))
ray.init()
while not runner.is_finished():
runner.step()
print(runner.debug_string())
@@ -0,0 +1,15 @@
tune_mnist:
env: mnist
alg: script
num_trials: 10
resources:
cpu: 1
stop:
mean_accuracy: 0.99
time_total_s: 600
config:
script_file_path: examples/tune_mnist_ray.py
script_entrypoint: train
script_min_iter_time_s: 1
activation:
grid_search: ['relu', 'elu', 'tanh']
+62
View File
@@ -0,0 +1,62 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import namedtuple
"""
When using ray.tune with custom training scripts, you must periodically report
training status back to Ray by calling status_reporter.report(result).
Most of the fields are optional, the only required one is timesteps_total.
In RLlib, the supplied algorithms fill in TrainingResult for you.
"""
TrainingResult = namedtuple("TrainingResult", [
# (Required) Accumulated timesteps for this entire experiment.
"timesteps_total",
# (Optional) Custom metadata to report for this iteration.
"info",
# (Optional) The mean episode reward if applicable.
"episode_reward_mean",
# (Optional) The mean episode length if applicable.
"episode_len_mean",
# (Optional) The current training accuracy if applicable>
"mean_accuracy",
# (Optional) The current training loss if applicable.
"mean_loss",
# (Auto-filled) The negated current training loss.
"neg_mean_loss",
# (Auto-filled) Unique string identifier for this experiment. This id is
# preserved across checkpoint / restore calls.
"experiment_id",
# (Auto-filled) The index of this training iteration, e.g. call to train().
"training_iteration",
# (Auto-filled) Number of timesteps in the simulator in this iteration.
"timesteps_this_iter",
# (Auto-filled) Time in seconds this iteration took to run.
"time_this_iter_s",
# (Auto-filled) Accumulated time in seconds for this entire experiment.
"time_total_s",
# (Auto-filled) The pid of the training process.
"pid",
# (Auto-filled) The hostname of the machine hosting the training process.
"hostname",
])
TrainingResult.__new__.__defaults__ = (None,) * len(TrainingResult._fields)
+162
View File
@@ -0,0 +1,162 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import importlib
import os
import sys
import time
import threading
from ray.rllib.agent import Agent
class StatusReporter(object):
"""Object passed into your main() that you can report status through."""
def __init__(self):
self._latest_result = None
self._lock = threading.Lock()
self._error = None
def report(self, result):
"""Report updated training status.
Args:
result (TrainingResult): Latest training result status. You must
at least define `timesteps_total`, but probably want to report
some of the other metrics as well.
"""
with self._lock:
self._latest_result = result
def set_error(self, error):
"""Report an error.
Args:
error (obj): Error object or string.
"""
self._error = error
def _get_and_clear_status(self):
if self._error:
raise Exception("Error running script: " + str(self._error))
with self._lock:
res = self._latest_result
self._latest_result = None
return res
def _stop(self):
self._error = "Agent stopped"
DEFAULT_CONFIG = {
# path of the script to run
"script_file_path": "/path/to/file.py",
# name of train function in the file, e.g. train(config, status_reporter)
"script_entrypoint": "train",
# batch results to at least this granularity
"script_min_iter_time_s": 5,
}
class _RunnerThread(threading.Thread):
"""Supervisor thread that runs your script."""
def __init__(self, entrypoint, config, status_reporter):
self._entrypoint = entrypoint
self._entrypoint_args = [config, status_reporter]
self._status_reporter = status_reporter
threading.Thread.__init__(self)
self.daemon = True
def run(self):
try:
self._entrypoint(*self._entrypoint_args)
except Exception as e:
self._status_reporter.set_error(e)
raise e
class ScriptRunner(Agent):
"""Agent that runs a user script returning training results."""
_agent_name = "script"
_default_config = DEFAULT_CONFIG
_allow_unknown_configs = True
def _init(self):
# strong assumption here that we're in a new process
file_path = os.path.expanduser(self.config["script_file_path"])
sys.path.insert(0, os.path.dirname(file_path))
if hasattr(importlib, "util"):
# Python 3.4+
spec = importlib.util.spec_from_file_location(
"external_file", file_path)
external_file = importlib.util.module_from_spec(spec)
spec.loader.exec_module(external_file)
elif hasattr(importlib, "machinery"):
# Python 3.3
from importlib.machinery import SourceFileLoader
external_file = SourceFileLoader(
"external_file", file_path).load_module()
else:
# Python 2.x
import imp
external_file = imp.load_source("external_file", file_path)
if not external_file:
raise Exception(
"Unable to import file at {}".format(
self.config["script_file_path"]))
entrypoint = getattr(external_file, self.config["script_entrypoint"])
self._status_reporter = StatusReporter()
self._runner = _RunnerThread(
entrypoint, self.config, self._status_reporter)
self._start_time = time.time()
self._last_reported_time = self._start_time
self._last_reported_timestep = 0
self._runner.start()
def train(self):
poll_start = time.time()
result = self._status_reporter._get_and_clear_status()
while result is None or \
time.time() - poll_start < \
self.config["script_min_iter_time_s"]:
time.sleep(1)
result = self._status_reporter._get_and_clear_status()
now = time.time()
# Include the negative loss to use as a stopping condition
if result.mean_loss is not None:
neg_loss = -result.mean_loss
else:
neg_loss = result.neg_mean_loss
result = result._replace(
experiment_id=self._experiment_id,
neg_mean_loss=neg_loss,
training_iteration=self.iteration,
time_this_iter_s=now - self._last_reported_time,
timesteps_this_iter=(
result.timesteps_total - self._last_reported_timestep),
time_total_s=now - self._start_time,
pid=os.getpid(),
hostname=os.uname()[1])
if result.timesteps_total:
self._last_reported_timestep = result.timesteps_total
self._last_reported_time = now
self._iteration += 1
self._log_result(result)
return result
def stop(self):
self._status_reporter._stop()
Agent.stop(self)
+35 -13
View File
@@ -2,12 +2,12 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import traceback
import ray
import os
from collections import namedtuple
from ray.rllib.agents import get_agent_class
from ray.rllib.agent import get_agent_class
# Ray resources required to schedule a Trial
@@ -24,15 +24,15 @@ class Trial(object):
On error it transitions to ERROR, otherwise TERMINATED on success.
"""
PENDING = 'PENDING'
RUNNING = 'RUNNING'
TERMINATED = 'TERMINATED'
ERROR = 'ERROR'
PENDING = "PENDING"
RUNNING = "RUNNING"
TERMINATED = "TERMINATED"
ERROR = "ERROR"
def __init__(
self, env_creator, alg, config={}, local_dir='/tmp/ray',
agent_id=None, resources=Resources(cpu=1, gpu=0),
stopping_criterion={}, checkpoint_freq=sys.maxsize,
stopping_criterion={}, checkpoint_freq=None,
restore_path=None, upload_dir=None):
"""Initialize a new trial.
@@ -61,6 +61,7 @@ class Trial(object):
self.checkpoint_path = None
self.agent = None
self.status = Trial.PENDING
self.location = None
def start(self):
"""Starts this trial.
@@ -135,12 +136,33 @@ class Trial(object):
if self.last_result is None:
return self.status
return '{}, {} s, {} ts, {} itrs, {} rew'.format(
self.status,
int(self.last_result.time_total_s),
int(self.last_result.timesteps_total),
self.last_result.training_iteration,
round(self.last_result.episode_reward_mean, 1))
def location_string(hostname, pid):
if hostname == os.uname()[1]:
return 'pid={}'.format(pid)
else:
return '{} pid={}'.format(hostname, pid)
pieces = [
'{} [{}]'.format(
self.status, location_string(
self.last_result.hostname, self.last_result.pid)),
'{} s'.format(int(self.last_result.time_total_s)),
'{} ts'.format(int(self.last_result.timesteps_total))]
if self.last_result.episode_reward_mean is not None:
pieces.append('{} rew'.format(
format(self.last_result.episode_reward_mean, '.3g')))
if self.last_result.mean_loss is not None:
pieces.append('{} loss'.format(
format(self.last_result.mean_loss, '.3g')))
if self.last_result.mean_accuracy is not None:
pieces.append('{} acc'.format(
format(self.last_result.mean_accuracy, '.3g')))
return ', '.join(pieces)
def checkpoint(self):
"""Synchronously checkpoints the state of this trial.
+6 -4
View File
@@ -63,7 +63,7 @@ class TrialRunner(object):
if trial.status == Trial.PENDING:
assert self._has_resources(trial.resources), \
("Insufficient cluster resources to launch trial",
trial.resources)
(trial.resources, self._avail_resources))
assert False, "Called step when all trials finished?"
def get_trials(self):
@@ -87,9 +87,11 @@ class TrialRunner(object):
messages = ["== Status =="]
messages.append(
"Available: {}".format(self._avail_resources))
messages.append(
"Committed: {}".format(self._committed_resources))
"Resources used: {}/{} CPUs, {}/{} GPUs".format(
self._committed_resources.cpu,
self._avail_resources.cpu,
self._committed_resources.gpu,
self._avail_resources.gpu))
for local_dir in sorted(set([t.local_dir for t in self._trials])):
messages.append("Tensorboard logdir: {}".format(local_dir))
for t in self._trials:
+14
View File
@@ -0,0 +1,14 @@
#!/usr/bin/env python
"""Command-line tool for tuning hyperparameters with Ray.
MNIST tuning example:
./tune.py -f examples/tune_mnist_ray.yaml
"""
from ray.rllib import train
import sys
# TODO(ekl) right now this is a thin wrapper around the rllib training script,
# however in the future we should have a separate command line tool here.
train.main(sys.argv[1:] + ['--alg=script'])