[rllib] Standardize writing output logs and other files to /tmp/ray (#706)

* rllib v0

* fix imports

* lint

* comments

* update docs

* a3c wip

* a3c wip

* report stats

* update doc

* add common logdir attr

* name is too long

* fix small bug

* propagate exception on error

* fetch metrics

* fix small nits
This commit is contained in:
Eric Liang
2017-07-03 09:01:47 -07:00
committed by Philipp Moritz
parent 2b11a7bca2
commit 66734847bb
6 changed files with 43 additions and 20 deletions
+3 -2
View File
@@ -26,7 +26,7 @@ class Runner(object):
The gradient computation is also executed from this object.
"""
def __init__(self, env_name, actor_id, logdir="/tmp/ray/a3c/", start=True):
def __init__(self, env_name, actor_id, logdir, start=True):
env = create_env(env_name)
self.id = actor_id
num_actions = env.action_space.n
@@ -89,7 +89,8 @@ class A3C(Algorithm):
self.policy = LSTMPolicy(
self.env.observation_space.shape, self.env.action_space.n, 0)
self.agents = [
Runner.remote(env_name, i) for i in range(config["num_workers"])]
Runner.remote(env_name, i, self.logdir)
for i in range(config["num_workers"])]
self.parameters = self.policy.get_weights()
self.iteration = 0
+26 -2
View File
@@ -1,4 +1,12 @@
from collections import namedtuple
from datetime import datetime
import json
import logging
import os
import tempfile
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
TrainingResult = namedtuple("TrainingResult", [
@@ -14,16 +22,32 @@ class Algorithm(object):
Algorithm objects retain internal model state between calls to train(), so
you should create a new algorithm instance for each training session.
Attributes:
env_name (str): Name of the OpenAI gym environment to train against.
config (obj): Algorithm-specific configuration data.
logdir (str): Directory in which training outputs should be placed.
TODO(ekl): support checkpoint / restore of training state.
"""
def __init__(self, env_name, config):
self.env_name = env_name
self.config = config
self.logdir = tempfile.mkdtemp(
prefix="{}_{}_{}".format(
env_name,
self.__class__.__name__,
datetime.today().strftime("%Y-%m-%d_%H-%M-%S")),
dir="/tmp/ray")
json.dump(
self.config, open(os.path.join(self.logdir, "config.json"), "w"),
sort_keys=True, indent=4)
logger.info(
"%s algorithm created with logdir '%s'",
self.__class__.__name__, self.logdir)
def train(self):
"""
Runs one logical iteration of training.
"""Runs one logical iteration of training.
Returns:
A TrainingResult that describes training progress.
@@ -277,7 +277,7 @@ class EvolutionStrategies(Algorithm):
if (config.snapshot_freq != 0 and
self.iteration % config.snapshot_freq == 0):
filename = os.path.join(
"/tmp", "snapshot_iter{:05d}.h5".format(self.iteration))
self.logdir, "snapshot_iter{:05d}.h5".format(self.iteration))
assert not os.path.exists(filename)
self.policy.save(filename)
tlogger.log("Saved snapshot {}".format(filename))
+4 -3
View File
@@ -48,7 +48,7 @@ class Agent(object):
this GPU-local data.
"""
def __init__(self, name, batchsize, preprocessor, config, is_remote):
def __init__(self, name, batchsize, preprocessor, config, logdir, is_remote):
if is_remote:
os.environ["CUDA_VISIBLE_DEVICES"] = ""
devices = ["/cpu:0"]
@@ -56,6 +56,7 @@ class Agent(object):
devices = config["devices"]
self.devices = devices
self.config = config
self.logdir = logdir
self.env = BatchedEnv(name, batchsize, preprocessor=preprocessor)
if preprocessor.shape is None:
preprocessor.shape = self.env.observation_space.shape
@@ -220,7 +221,7 @@ class Agent(object):
run_metadata=run_metadata)
if full_trace:
trace = timeline.Timeline(step_stats=run_metadata.step_stats)
trace_file = open("/tmp/ray/timeline-load.json", "w")
trace_file = open(os.path.join(self.logdir, "timeline-load.json"), "w")
trace_file.write(trace.generate_chrome_trace_format())
tuples_per_device = len(truncated_obs) / len(self.devices)
@@ -254,7 +255,7 @@ class Agent(object):
if full_trace:
trace = timeline.Timeline(step_stats=run_metadata.step_stats)
trace_file = open("/tmp/ray/timeline-sgd.json", "w")
trace_file = open(os.path.join(self.logdir, "timeline-sgd.json"), "w")
trace_file.write(trace.generate_chrome_trace_format())
file_writer.add_run_metadata(
run_metadata, "sgd_train_{}".format(batch_index))
@@ -2,7 +2,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from datetime import datetime
import os
import time
import numpy as np
@@ -36,11 +36,10 @@ DEFAULT_CONFIG = {
"kl_target": 0.01,
"timesteps_per_batch": 40000,
"num_agents": 5,
"tensorboard_log_dir": "/tmp/ray",
"full_trace_nth_sgd_batch": -1,
"full_trace_data_load": False,
"use_tf_debugger": False,
"model_checkpoint_file": "/tmp/iteration-%s.ckpt"}
"model_checkpoint_file": "iteration-%s.ckpt"}
class PolicyGradient(Algorithm):
@@ -64,10 +63,11 @@ class PolicyGradient(Algorithm):
self.j = 0
self.kl_coeff = config["kl_coeff"]
self.model = Agent(
self.env_name, 1, self.preprocessor, self.config, False)
self.env_name, 1, self.preprocessor, self.config, self.logdir, False)
self.agents = [
RemoteAgent.remote(
self.env_name, 1, self.preprocessor, self.config, True)
self.env_name, 1, self.preprocessor, self.config,
self.logdir, True)
for _ in range(config["num_agents"])]
def train(self):
@@ -81,15 +81,12 @@ class PolicyGradient(Algorithm):
if "load_checkpoint" in config:
saver.restore(model.sess, config["load_checkpoint"])
file_writer = tf.summary.FileWriter(
"{}/trpo_{}_{}".format(
config["tensorboard_log_dir"], self.env_name,
str(datetime.today()).replace(" ", "_")),
model.sess.graph)
file_writer = tf.summary.FileWriter(self.logdir, model.sess.graph)
iter_start = time.time()
if config["model_checkpoint_file"]:
checkpoint_path = saver.save(
model.sess, config["model_checkpoint_file"] % j)
model.sess,
os.path.join(self.logdir, config["model_checkpoint_file"] % j))
print("Checkpoint saved in file: %s" % checkpoint_path)
checkpointing_end = time.time()
weights = ray.put(model.get_weights())