[rllib] Standardize writing output logs and other files to /tmp/ray (#706)

* rllib v0 * fix imports * lint * comments * update docs * a3c wip * a3c wip * report stats * update doc * add common logdir attr * name is too long * fix small bug * propagate exception on error * fetch metrics * fix small nits
2026-06-30 08:46:01 +08:00 · 2017-07-03 09:01:47 -07:00
parent 2b11a7bca2
commit 66734847bb
6 changed files with 43 additions and 20 deletions
@@ -26,7 +26,7 @@ class Runner(object):

  The gradient computation is also executed from this object.
  """
-  def __init__(self, env_name, actor_id, logdir="/tmp/ray/a3c/", start=True):
+  def __init__(self, env_name, actor_id, logdir, start=True):
    env = create_env(env_name)
    self.id = actor_id
    num_actions = env.action_space.n
@@ -89,7 +89,8 @@ class A3C(Algorithm):
    self.policy = LSTMPolicy(
        self.env.observation_space.shape, self.env.action_space.n, 0)
    self.agents = [
-        Runner.remote(env_name, i) for i in range(config["num_workers"])]
+        Runner.remote(env_name, i, self.logdir)
+        for i in range(config["num_workers"])]
    self.parameters = self.policy.get_weights()
    self.iteration = 0

@@ -1,4 +1,12 @@
 from collections import namedtuple
+from datetime import datetime
+import json
+import logging
+import os
+import tempfile
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)


 TrainingResult = namedtuple("TrainingResult", [
@@ -14,16 +22,32 @@ class Algorithm(object):
  Algorithm objects retain internal model state between calls to train(), so
  you should create a new algorithm instance for each training session.

+  Attributes:
+    env_name (str): Name of the OpenAI gym environment to train against.
+    config (obj): Algorithm-specific configuration data.
+    logdir (str): Directory in which training outputs should be placed.
+
  TODO(ekl): support checkpoint / restore of training state.
  """

  def __init__(self, env_name, config):
    self.env_name = env_name
    self.config = config
+    self.logdir = tempfile.mkdtemp(
+        prefix="{}_{}_{}".format(
+            env_name,
+            self.__class__.__name__,
+            datetime.today().strftime("%Y-%m-%d_%H-%M-%S")),
+        dir="/tmp/ray")
+    json.dump(
+        self.config, open(os.path.join(self.logdir, "config.json"), "w"),
+        sort_keys=True, indent=4)
+    logger.info(
+        "%s algorithm created with logdir '%s'",
+        self.__class__.__name__, self.logdir)

  def train(self):
-    """
-    Runs one logical iteration of training.
+    """Runs one logical iteration of training.

    Returns:
      A TrainingResult that describes training progress.
@@ -277,7 +277,7 @@ class EvolutionStrategies(Algorithm):
    if (config.snapshot_freq != 0 and
            self.iteration % config.snapshot_freq == 0):
      filename = os.path.join(
-          "/tmp", "snapshot_iter{:05d}.h5".format(self.iteration))
+          self.logdir, "snapshot_iter{:05d}.h5".format(self.iteration))
      assert not os.path.exists(filename)
      self.policy.save(filename)
      tlogger.log("Saved snapshot {}".format(filename))
@@ -48,7 +48,7 @@ class Agent(object):
  this GPU-local data.
  """

-  def __init__(self, name, batchsize, preprocessor, config, is_remote):
+  def __init__(self, name, batchsize, preprocessor, config, logdir, is_remote):
    if is_remote:
      os.environ["CUDA_VISIBLE_DEVICES"] = ""
      devices = ["/cpu:0"]
@@ -56,6 +56,7 @@ class Agent(object):
      devices = config["devices"]
    self.devices = devices
    self.config = config
+    self.logdir = logdir
    self.env = BatchedEnv(name, batchsize, preprocessor=preprocessor)
    if preprocessor.shape is None:
      preprocessor.shape = self.env.observation_space.shape
@@ -220,7 +221,7 @@ class Agent(object):
        run_metadata=run_metadata)
    if full_trace:
      trace = timeline.Timeline(step_stats=run_metadata.step_stats)
-      trace_file = open("/tmp/ray/timeline-load.json", "w")
+      trace_file = open(os.path.join(self.logdir, "timeline-load.json"), "w")
      trace_file.write(trace.generate_chrome_trace_format())

    tuples_per_device = len(truncated_obs) / len(self.devices)
@@ -254,7 +255,7 @@ class Agent(object):

    if full_trace:
      trace = timeline.Timeline(step_stats=run_metadata.step_stats)
-      trace_file = open("/tmp/ray/timeline-sgd.json", "w")
+      trace_file = open(os.path.join(self.logdir, "timeline-sgd.json"), "w")
      trace_file.write(trace.generate_chrome_trace_format())
      file_writer.add_run_metadata(
          run_metadata, "sgd_train_{}".format(batch_index))
@@ -2,7 +2,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from datetime import datetime
+import os
 import time

 import numpy as np
@@ -36,11 +36,10 @@ DEFAULT_CONFIG = {
    "kl_target": 0.01,
    "timesteps_per_batch": 40000,
    "num_agents": 5,
-    "tensorboard_log_dir": "/tmp/ray",
    "full_trace_nth_sgd_batch": -1,
    "full_trace_data_load": False,
    "use_tf_debugger": False,
-    "model_checkpoint_file": "/tmp/iteration-%s.ckpt"}
+    "model_checkpoint_file": "iteration-%s.ckpt"}


 class PolicyGradient(Algorithm):
@@ -64,10 +63,11 @@ class PolicyGradient(Algorithm):
    self.j = 0
    self.kl_coeff = config["kl_coeff"]
    self.model = Agent(
-        self.env_name, 1, self.preprocessor, self.config, False)
+        self.env_name, 1, self.preprocessor, self.config, self.logdir, False)
    self.agents = [
        RemoteAgent.remote(
-            self.env_name, 1, self.preprocessor, self.config, True)
+            self.env_name, 1, self.preprocessor, self.config,
+            self.logdir, True)
        for _ in range(config["num_agents"])]

  def train(self):
@@ -81,15 +81,12 @@ class PolicyGradient(Algorithm):
    if "load_checkpoint" in config:
      saver.restore(model.sess, config["load_checkpoint"])

-    file_writer = tf.summary.FileWriter(
-        "{}/trpo_{}_{}".format(
-            config["tensorboard_log_dir"], self.env_name,
-            str(datetime.today()).replace(" ", "_")),
-        model.sess.graph)
+    file_writer = tf.summary.FileWriter(self.logdir, model.sess.graph)
    iter_start = time.time()
    if config["model_checkpoint_file"]:
      checkpoint_path = saver.save(
-          model.sess, config["model_checkpoint_file"] % j)
+          model.sess,
+          os.path.join(self.logdir, config["model_checkpoint_file"] % j))
      print("Checkpoint saved in file: %s" % checkpoint_path)
    checkpointing_end = time.time()
    weights = ray.put(model.get_weights())