mirror of
https://github.com/wassname/ray.git
synced 2026-06-30 08:46:01 +08:00
[rllib] Standardize writing output logs and other files to /tmp/ray (#706)
* rllib v0 * fix imports * lint * comments * update docs * a3c wip * a3c wip * report stats * update doc * add common logdir attr * name is too long * fix small bug * propagate exception on error * fetch metrics * fix small nits
This commit is contained in:
committed by
Philipp Moritz
parent
2b11a7bca2
commit
66734847bb
@@ -26,7 +26,7 @@ class Runner(object):
|
||||
|
||||
The gradient computation is also executed from this object.
|
||||
"""
|
||||
def __init__(self, env_name, actor_id, logdir="/tmp/ray/a3c/", start=True):
|
||||
def __init__(self, env_name, actor_id, logdir, start=True):
|
||||
env = create_env(env_name)
|
||||
self.id = actor_id
|
||||
num_actions = env.action_space.n
|
||||
@@ -89,7 +89,8 @@ class A3C(Algorithm):
|
||||
self.policy = LSTMPolicy(
|
||||
self.env.observation_space.shape, self.env.action_space.n, 0)
|
||||
self.agents = [
|
||||
Runner.remote(env_name, i) for i in range(config["num_workers"])]
|
||||
Runner.remote(env_name, i, self.logdir)
|
||||
for i in range(config["num_workers"])]
|
||||
self.parameters = self.policy.get_weights()
|
||||
self.iteration = 0
|
||||
|
||||
|
||||
@@ -1,4 +1,12 @@
|
||||
from collections import namedtuple
|
||||
from datetime import datetime
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
TrainingResult = namedtuple("TrainingResult", [
|
||||
@@ -14,16 +22,32 @@ class Algorithm(object):
|
||||
Algorithm objects retain internal model state between calls to train(), so
|
||||
you should create a new algorithm instance for each training session.
|
||||
|
||||
Attributes:
|
||||
env_name (str): Name of the OpenAI gym environment to train against.
|
||||
config (obj): Algorithm-specific configuration data.
|
||||
logdir (str): Directory in which training outputs should be placed.
|
||||
|
||||
TODO(ekl): support checkpoint / restore of training state.
|
||||
"""
|
||||
|
||||
def __init__(self, env_name, config):
|
||||
self.env_name = env_name
|
||||
self.config = config
|
||||
self.logdir = tempfile.mkdtemp(
|
||||
prefix="{}_{}_{}".format(
|
||||
env_name,
|
||||
self.__class__.__name__,
|
||||
datetime.today().strftime("%Y-%m-%d_%H-%M-%S")),
|
||||
dir="/tmp/ray")
|
||||
json.dump(
|
||||
self.config, open(os.path.join(self.logdir, "config.json"), "w"),
|
||||
sort_keys=True, indent=4)
|
||||
logger.info(
|
||||
"%s algorithm created with logdir '%s'",
|
||||
self.__class__.__name__, self.logdir)
|
||||
|
||||
def train(self):
|
||||
"""
|
||||
Runs one logical iteration of training.
|
||||
"""Runs one logical iteration of training.
|
||||
|
||||
Returns:
|
||||
A TrainingResult that describes training progress.
|
||||
|
||||
@@ -277,7 +277,7 @@ class EvolutionStrategies(Algorithm):
|
||||
if (config.snapshot_freq != 0 and
|
||||
self.iteration % config.snapshot_freq == 0):
|
||||
filename = os.path.join(
|
||||
"/tmp", "snapshot_iter{:05d}.h5".format(self.iteration))
|
||||
self.logdir, "snapshot_iter{:05d}.h5".format(self.iteration))
|
||||
assert not os.path.exists(filename)
|
||||
self.policy.save(filename)
|
||||
tlogger.log("Saved snapshot {}".format(filename))
|
||||
|
||||
@@ -48,7 +48,7 @@ class Agent(object):
|
||||
this GPU-local data.
|
||||
"""
|
||||
|
||||
def __init__(self, name, batchsize, preprocessor, config, is_remote):
|
||||
def __init__(self, name, batchsize, preprocessor, config, logdir, is_remote):
|
||||
if is_remote:
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
||||
devices = ["/cpu:0"]
|
||||
@@ -56,6 +56,7 @@ class Agent(object):
|
||||
devices = config["devices"]
|
||||
self.devices = devices
|
||||
self.config = config
|
||||
self.logdir = logdir
|
||||
self.env = BatchedEnv(name, batchsize, preprocessor=preprocessor)
|
||||
if preprocessor.shape is None:
|
||||
preprocessor.shape = self.env.observation_space.shape
|
||||
@@ -220,7 +221,7 @@ class Agent(object):
|
||||
run_metadata=run_metadata)
|
||||
if full_trace:
|
||||
trace = timeline.Timeline(step_stats=run_metadata.step_stats)
|
||||
trace_file = open("/tmp/ray/timeline-load.json", "w")
|
||||
trace_file = open(os.path.join(self.logdir, "timeline-load.json"), "w")
|
||||
trace_file.write(trace.generate_chrome_trace_format())
|
||||
|
||||
tuples_per_device = len(truncated_obs) / len(self.devices)
|
||||
@@ -254,7 +255,7 @@ class Agent(object):
|
||||
|
||||
if full_trace:
|
||||
trace = timeline.Timeline(step_stats=run_metadata.step_stats)
|
||||
trace_file = open("/tmp/ray/timeline-sgd.json", "w")
|
||||
trace_file = open(os.path.join(self.logdir, "timeline-sgd.json"), "w")
|
||||
trace_file.write(trace.generate_chrome_trace_format())
|
||||
file_writer.add_run_metadata(
|
||||
run_metadata, "sgd_train_{}".format(batch_index))
|
||||
|
||||
@@ -2,7 +2,7 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from datetime import datetime
|
||||
import os
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
@@ -36,11 +36,10 @@ DEFAULT_CONFIG = {
|
||||
"kl_target": 0.01,
|
||||
"timesteps_per_batch": 40000,
|
||||
"num_agents": 5,
|
||||
"tensorboard_log_dir": "/tmp/ray",
|
||||
"full_trace_nth_sgd_batch": -1,
|
||||
"full_trace_data_load": False,
|
||||
"use_tf_debugger": False,
|
||||
"model_checkpoint_file": "/tmp/iteration-%s.ckpt"}
|
||||
"model_checkpoint_file": "iteration-%s.ckpt"}
|
||||
|
||||
|
||||
class PolicyGradient(Algorithm):
|
||||
@@ -64,10 +63,11 @@ class PolicyGradient(Algorithm):
|
||||
self.j = 0
|
||||
self.kl_coeff = config["kl_coeff"]
|
||||
self.model = Agent(
|
||||
self.env_name, 1, self.preprocessor, self.config, False)
|
||||
self.env_name, 1, self.preprocessor, self.config, self.logdir, False)
|
||||
self.agents = [
|
||||
RemoteAgent.remote(
|
||||
self.env_name, 1, self.preprocessor, self.config, True)
|
||||
self.env_name, 1, self.preprocessor, self.config,
|
||||
self.logdir, True)
|
||||
for _ in range(config["num_agents"])]
|
||||
|
||||
def train(self):
|
||||
@@ -81,15 +81,12 @@ class PolicyGradient(Algorithm):
|
||||
if "load_checkpoint" in config:
|
||||
saver.restore(model.sess, config["load_checkpoint"])
|
||||
|
||||
file_writer = tf.summary.FileWriter(
|
||||
"{}/trpo_{}_{}".format(
|
||||
config["tensorboard_log_dir"], self.env_name,
|
||||
str(datetime.today()).replace(" ", "_")),
|
||||
model.sess.graph)
|
||||
file_writer = tf.summary.FileWriter(self.logdir, model.sess.graph)
|
||||
iter_start = time.time()
|
||||
if config["model_checkpoint_file"]:
|
||||
checkpoint_path = saver.save(
|
||||
model.sess, config["model_checkpoint_file"] % j)
|
||||
model.sess,
|
||||
os.path.join(self.logdir, config["model_checkpoint_file"] % j))
|
||||
print("Checkpoint saved in file: %s" % checkpoint_path)
|
||||
checkpointing_end = time.time()
|
||||
weights = ray.put(model.get_weights())
|
||||
|
||||
Reference in New Issue
Block a user