[rllib] switch to python logger (#3098)

* logg * set rllib logger * comment * info * rlib * comment * add format * fix lint * add file info * update * add ts * lint * better docs * fix value error * soft log level
2026-06-27 21:38:18 +08:00 · 2018-10-21 23:43:57 -07:00
parent 40c4148d4f
commit 221d1663c1
25 changed files with 160 additions and 320 deletions
@@ -2,6 +2,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import logging
+
 # Note: do not introduce unnecessary library dependencies here, e.g. gym.
 # This file is imported from the tune module in order to register RLlib agents.
 from ray.tune.registry import register_trainable
@@ -16,6 +18,17 @@ from ray.rllib.evaluation.policy_evaluator import PolicyEvaluator
 from ray.rllib.evaluation.sample_batch import SampleBatch


+def _setup_logger():
+    logger = logging.getLogger("ray.rllib")
+    handler = logging.StreamHandler()
+    handler.setFormatter(
+        logging.Formatter(
+            "%(asctime)s\t%(levelname)s %(filename)s:%(lineno)s -- %(message)s"
+        ))
+    logger.addHandler(handler)
+    logger.propagate = False
+
+
 def _register_all():

    for key in [
@@ -27,6 +40,7 @@ def _register_all():
        register_trainable(key, get_agent_class(key))


+_setup_logger()
 _register_all()

 __all__ = [
@@ -10,6 +10,7 @@ from ray.rllib.optimizers import AsyncGradientsOptimizer
 from ray.rllib.utils import merge_dicts
 from ray.tune.trial import Resources

+# yapf: disable
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
    # Size of rollout batch
@@ -36,8 +37,8 @@ DEFAULT_CONFIG = with_common_config({
    # sample_batch_size by up to 5x due to async buffering of batches.
    "sample_async": True,
 })
-
 # __sphinx_doc_end__
+# yapf: enable


 class A3CAgent(Agent):
@@ -4,6 +4,7 @@ from __future__ import print_function

 import copy
 import os
+import logging
 import pickle
 import tempfile
 from datetime import datetime
@@ -19,12 +20,38 @@ from ray.tune.trainable import Trainable
 from ray.tune.logger import UnifiedLogger
 from ray.tune.result import DEFAULT_RESULTS_DIR

+# yapf: disable
 # __sphinx_doc_begin__
 COMMON_CONFIG = {
+    # === Debugging ===
+    # Whether to write episode stats and videos to the agent log dir
+    "monitor": False,
+    # Set the RLlib log level for the agent process and its remote evaluators
+    "log_level": "INFO",
+
+    # === Policy ===
+    # Arguments to pass to model. See models/catalog.py for a full list of the
+    # available model options.
+    "model": MODEL_DEFAULTS,
+    # Arguments to pass to the policy optimizer. These vary by optimizer.
+    "optimizer": {},
+
+    # === Environment ===
    # Discount factor of the MDP
    "gamma": 0.99,
    # Number of steps after which the episode is forced to terminate
    "horizon": None,
+    # Arguments to pass to the env creator
+    "env_config": {},
+    # Environment name can also be passed via config
+    "env": None,
+    # Whether to clip rewards prior to experience postprocessing. Setting to
+    # None means clip for Atari only.
+    "clip_rewards": None,
+    # Whether to use rllib or deepmind preprocessors by default
+    "preprocessor_pref": "deepmind",
+
+    # === Execution ===
    # Number of environments to evaluate vectorwise per worker.
    "num_envs_per_worker": 1,
    # Number of actors used for parallelism
@@ -42,20 +69,6 @@ COMMON_CONFIG = {
    "observation_filter": "NoFilter",
    # Whether to synchronize the statistics of remote filters.
    "synchronize_filters": True,
-    # Whether to clip rewards prior to experience postprocessing. Setting to
-    # None means clip for Atari only.
-    "clip_rewards": None,
-    # Whether to use rllib or deepmind preprocessors
-    "preprocessor_pref": "deepmind",
-    # Arguments to pass to the env creator
-    "env_config": {},
-    # Environment name can also be passed via config
-    "env": None,
-    # Arguments to pass to model. See models/catalog.py for a full list of the
-    # available model options.
-    "model": MODEL_DEFAULTS,
-    # Arguments to pass to the policy optimizer. These vary by optimizer.
-    "optimizer": {},
    # Configure TF for single-process operation by default
    "tf_session_args": {
        # note: parallelism_threads is set to auto for the local evaluator
@@ -72,8 +85,6 @@ COMMON_CONFIG = {
    },
    # Whether to LZ4 compress observations
    "compress_observations": False,
-    # Whether to write episode stats and videos to the agent log dir
-    "monitor": False,
    # Allocate a fraction of a GPU instead of one (e.g., 0.3 GPUs)
    "gpu_fraction": 1,

@@ -88,8 +99,8 @@ COMMON_CONFIG = {
        "policies_to_train": None,
    },
 }
-
 # __sphinx_doc_end__
+# yapf: enable


 def with_common_config(extra_config):
@@ -170,7 +181,8 @@ class Agent(Trainable):
            model_config=config["model"],
            policy_config=config,
            worker_index=worker_index,
-            monitor_path=self.logdir if config["monitor"] else None)
+            monitor_path=self.logdir if config["monitor"] else None,
+            log_level=config["log_level"])

    @classmethod
    def resource_help(cls, config):
@@ -197,13 +209,12 @@ class Agent(Trainable):

        # Agents allow env ids to be passed directly to the constructor.
        self._env_id = env or config.get("env")
-        if not self._env_id:
-            raise ValueError("Must specify env (str) when creating agent")

        # Create a default logger creator if no logger_creator is specified
        if logger_creator is None:
            timestr = datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
-            logdir_prefix = '_'.join([self._agent_name, self._env_id, timestr])
+            logdir_prefix = "{}_{}_{}".format(
+                [self._agent_name, self._env_id, timestr])

            def default_logger_creator(config):
                """Creates a Unified logger with a default logdir prefix
@@ -256,6 +267,8 @@ class Agent(Trainable):
                                    self._allow_unknown_configs,
                                    self._allow_unknown_subkeys)
        self.config = merged_config
+        if self.config.get("log_level"):
+            logging.getLogger("ray.rllib").setLevel(self.config["log_level"])

        # TODO(ekl) setting the graph is unnecessary for PyTorch agents
        with tf.Graph().as_default():
@@ -7,6 +7,7 @@ from __future__ import division
 from __future__ import print_function

 from collections import namedtuple
+import logging
 import numpy as np
 import time

@@ -16,14 +17,16 @@ from ray.tune.trial import Resources

 from ray.rllib.agents.ars import optimizers
 from ray.rllib.agents.ars import policies
-from ray.rllib.agents.es import tabular_logger as tlogger
 from ray.rllib.agents.ars import utils

+logger = logging.getLogger(__name__)
+
 Result = namedtuple("Result", [
    "noise_indices", "noisy_returns", "sign_noisy_returns", "noisy_lengths",
    "eval_returns", "eval_lengths"
 ])

+# yapf: disable
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
    "noise_stdev": 0.02,  # std deviation of parameter noise
@@ -38,6 +41,7 @@ DEFAULT_CONFIG = with_common_config({
    "offset": 0,
 })
 # __sphinx_doc_end__
+# yapf: enable


@ray.remote
@@ -163,12 +167,12 @@ class ARSAgent(Agent):
        self.report_length = self.config["report_length"]

        # Create the shared noise table.
-        print("Creating shared noise table.")
+        logger.info("Creating shared noise table.")
        noise_id = create_shared_noise.remote(self.config["noise_size"])
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
-        print("Creating actors.")
+        logger.info("Creating actors.")
        self.workers = [
            Worker.remote(self.config, self.env_creator, noise_id)
            for _ in range(self.config["num_workers"])
@@ -182,8 +186,9 @@ class ARSAgent(Agent):
        num_episodes, num_timesteps = 0, 0
        results = []
        while num_episodes < min_episodes:
-            print("Collected {} episodes {} timesteps so far this iter".format(
-                num_episodes, num_timesteps))
+            logger.info(
+                "Collected {} episodes {} timesteps so far this iter".format(
+                    num_episodes, num_timesteps))
            rollout_ids = [
                worker.do_rollouts.remote(theta_id) for worker in self.workers
            ]
@@ -263,7 +268,6 @@ class ARSAgent(Agent):
            g /= np.std(noisy_returns)
        assert (g.shape == (self.policy.num_params, )
                and g.dtype == np.float32)
-        print('the number of policy params is, ', self.policy.num_params)
        # Compute the new weights theta.
        theta, update_ratio = self.optimizer.update(-g)
        # Set the new weights in the local copy of the policy.
@@ -272,18 +276,9 @@ class ARSAgent(Agent):
        if len(all_eval_returns) > 0:
            self.reward_list.append(eval_returns.mean())

-        tlogger.record_tabular("NoisyEpRewMean", noisy_returns.mean())
-        tlogger.record_tabular("NoisyEpRewStd", noisy_returns.std())
-        tlogger.record_tabular("NoisyEpLenMean", noisy_lengths.mean())
-
-        tlogger.record_tabular("WeightsNorm", float(np.square(theta).sum()))
-        tlogger.record_tabular("WeightsStd", float(np.std(theta)))
-        tlogger.record_tabular("Grad2Norm", float(np.sqrt(np.square(g).sum())))
-        tlogger.record_tabular("UpdateRatio", float(update_ratio))
-        tlogger.dump_tabular()
-
        info = {
            "weights_norm": np.square(theta).sum(),
+            "weights_std": np.std(theta),
            "grad_norm": np.square(g).sum(),
            "update_ratio": update_ratio,
            "episodes_this_iter": noisy_lengths.size,
@@ -13,6 +13,7 @@ OPTIMIZER_SHARED_CONFIGS = [
    "train_batch_size", "learning_starts"
 ]

+# yapf: disable
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
    # === Model ===
@@ -108,8 +109,8 @@ DEFAULT_CONFIG = with_common_config({
    # Prevent iterations from going lower than this time span
    "min_iter_time_s": 1,
 })
-
 # __sphinx_doc_end__
+# yapf: enable


 class DDPGAgent(DQNAgent):
@@ -6,6 +6,7 @@ from ray.rllib.agents.dqn.dqn import DQNAgent, DEFAULT_CONFIG as DQN_CONFIG
 from ray.rllib.utils import merge_dicts
 from ray.tune.trial import Resources

+# yapf: disable
 # __sphinx_doc_begin__
 APEX_DEFAULT_CONFIG = merge_dicts(
    DQN_CONFIG,  # see also the options in dqn.py, which are also supported
@@ -31,8 +32,8 @@ APEX_DEFAULT_CONFIG = merge_dicts(
        "min_iter_time_s": 30,
    },
 )
-
 # __sphinx_doc_end__
+# yapf: enable


 class ApexAgent(DQNAgent):
@@ -20,6 +20,7 @@ OPTIMIZER_SHARED_CONFIGS = [
    "learning_starts"
 ]

+# yapf: disable
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
    # === Model ===
@@ -116,8 +117,8 @@ DEFAULT_CONFIG = with_common_config({
    # Prevent iterations from going lower than this time span
    "min_iter_time_s": 1,
 })
-
 # __sphinx_doc_end__
+# yapf: enable


 class DQNAgent(Agent):
@@ -6,6 +6,7 @@ from __future__ import division
 from __future__ import print_function

 from collections import namedtuple
+import logging
 import numpy as np
 import time

@@ -15,15 +16,17 @@ from ray.tune.trial import Resources

 from ray.rllib.agents.es import optimizers
 from ray.rllib.agents.es import policies
-from ray.rllib.agents.es import tabular_logger as tlogger
 from ray.rllib.agents.es import utils
 from ray.rllib.utils import merge_dicts

+logger = logging.getLogger(__name__)
+
 Result = namedtuple("Result", [
    "noise_indices", "noisy_returns", "sign_noisy_returns", "noisy_lengths",
    "eval_returns", "eval_lengths"
 ])

+# yapf: disable
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
    "l2_coeff": 0.005,
@@ -39,6 +42,7 @@ DEFAULT_CONFIG = with_common_config({
    "report_length": 10,
 })
 # __sphinx_doc_end__
+# yapf: enable


@ray.remote
@@ -169,12 +173,12 @@ class ESAgent(Agent):
        self.report_length = self.config["report_length"]

        # Create the shared noise table.
-        print("Creating shared noise table.")
+        logger.info("Creating shared noise table.")
        noise_id = create_shared_noise.remote(self.config["noise_size"])
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
-        print("Creating actors.")
+        logger.info("Creating actors.")
        self.workers = [
            Worker.remote(self.config, policy_params, self.env_creator,
                          noise_id) for _ in range(self.config["num_workers"])
@@ -188,8 +192,9 @@ class ESAgent(Agent):
        num_episodes, num_timesteps = 0, 0
        results = []
        while num_episodes < min_episodes or num_timesteps < min_timesteps:
-            print("Collected {} episodes {} timesteps so far this iter".format(
-                num_episodes, num_timesteps))
+            logger.info(
+                "Collected {} episodes {} timesteps so far this iter".format(
+                    num_episodes, num_timesteps))
            rollout_ids = [
                worker.do_rollouts.remote(theta_id) for worker in self.workers
            ]
@@ -269,21 +274,6 @@ class ESAgent(Agent):
        if len(all_eval_returns) > 0:
            self.reward_list.append(np.mean(eval_returns))

-        tlogger.record_tabular("EvalEpRewStd", eval_returns.std())
-        tlogger.record_tabular("EvalEpLenMean", eval_lengths.mean())
-
-        tlogger.record_tabular("EpRewMean", noisy_returns.mean())
-        tlogger.record_tabular("EpRewStd", noisy_returns.std())
-        tlogger.record_tabular("EpLenMean", noisy_lengths.mean())
-
-        tlogger.record_tabular("Norm", float(np.square(theta).sum()))
-        tlogger.record_tabular("GradNorm", float(np.square(g).sum()))
-        tlogger.record_tabular("UpdateRatio", float(update_ratio))
-
-        tlogger.record_tabular("EpisodesThisIter", noisy_lengths.size)
-        tlogger.record_tabular("EpisodesSoFar", self.episodes_so_far)
-        tlogger.dump_tabular()
-
        info = {
            "weights_norm": np.square(theta).sum(),
            "grad_norm": np.square(g).sum(),
@@ -1,229 +0,0 @@
-# Code in this file is copied and adapted from
-# https://github.com/openai/evolution-strategies-starter.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from collections import OrderedDict
-import os
-import sys
-import time
-
-import tensorflow as tf
-from tensorflow.core.util import event_pb2
-from tensorflow.python import pywrap_tensorflow
-from tensorflow.python.util import compat
-
-DEBUG = 10
-INFO = 20
-WARN = 30
-ERROR = 40
-
-DISABLED = 50
-
-
-class TbWriter(object):
-    """Based on SummaryWriter, but changed to allow for a different prefix."""
-
-    def __init__(self, dir, prefix):
-        self.dir = dir
-        # Start at 1, because EvWriter automatically generates an object with
-        # step = 0.
-        self.step = 1
-        self.evwriter = pywrap_tensorflow.EventsWriter(
-            compat.as_bytes(os.path.join(dir, prefix)))
-
-    def write_values(self, key2val):
-        summary = tf.Summary(value=[
-            tf.Summary.Value(tag=k, simple_value=float(v))
-            for (k, v) in key2val.items()
-        ])
-        event = event_pb2.Event(wall_time=time.time(), summary=summary)
-        event.step = self.step
-        self.evwriter.WriteEvent(event)
-        self.evwriter.Flush()
-        self.step += 1
-
-    def close(self):
-        self.evwriter.Close()
-
-
-# API
-
-
-def start(dir):
-    if _Logger.CURRENT is not _Logger.DEFAULT:
-        sys.stderr.write("WARNING: You asked to start logging (dir=%s), but "
-                         "you never stopped the previous logger (dir=%s)."
-                         "\n" % (dir, _Logger.CURRENT.dir))
-    _Logger.CURRENT = _Logger(dir=dir)
-
-
-def stop():
-    if _Logger.CURRENT is _Logger.DEFAULT:
-        sys.stderr.write("WARNING: You asked to stop logging, but you never "
-                         "started any previous logger."
-                         "\n" % (dir, _Logger.CURRENT.dir))
-        return
-    _Logger.CURRENT.close()
-    _Logger.CURRENT = _Logger.DEFAULT
-
-
-def record_tabular(key, val):
-    """Log a value of some diagnostic.
-
-    Call this once for each diagnostic quantity, each iteration.
-    """
-    _Logger.CURRENT.record_tabular(key, val)
-
-
-def dump_tabular():
-    """Write all of the diagnostics from the current iteration."""
-    _Logger.CURRENT.dump_tabular()
-
-
-def log(*args, **kwargs):
-    """Write the sequence of args, with no separators.
-
-    This is written to the console and output files (if you've configured an
-    output file).
-    """
-    level = kwargs['level'] if 'level' in kwargs else INFO
-    _Logger.CURRENT.log(*args, level=level)
-
-
-def debug(*args):
-    log(*args, level=DEBUG)
-
-
-def info(*args):
-    log(*args, level=INFO)
-
-
-def warn(*args):
-    log(*args, level=WARN)
-
-
-def error(*args):
-    log(*args, level=ERROR)
-
-
-def set_level(level):
-    """
-    Set logging threshold on current logger.
-    """
-    _Logger.CURRENT.set_level(level)
-
-
-def get_dir():
-    """
-    Get directory that log files are being written to.
-    will be None if there is no output directory (i.e., if you didn't call
-    start)
-    """
-    return _Logger.CURRENT.get_dir()
-
-
-def get_expt_dir():
-    sys.stderr.write("get_expt_dir() is Deprecated. Switch to get_dir()\n")
-    return get_dir()
-
-
-# Backend
-
-
-class _Logger(object):
-    # A logger with no output files. (See right below class definition) so that
-    # you can still log to the terminal without setting up any output files.
-    DEFAULT = None
-    # Current logger being used by the free functions above.
-    CURRENT = None
-
-    def __init__(self, dir=None):
-        self.name2val = OrderedDict()  # Values this iteration.
-        self.level = INFO
-        self.dir = dir
-        self.text_outputs = [sys.stdout]
-        if dir is not None:
-            os.makedirs(dir, exist_ok=True)
-            self.text_outputs.append(open(os.path.join(dir, "log.txt"), "w"))
-            self.tbwriter = TbWriter(dir=dir, prefix="events")
-        else:
-            self.tbwriter = None
-
-    # Logging API, forwarded
-
-    def record_tabular(self, key, val):
-        self.name2val[key] = val
-
-    def dump_tabular(self):
-        # Create strings for printing.
-        key2str = OrderedDict()
-        for (key, val) in self.name2val.items():
-            if hasattr(val, "__float__"):
-                valstr = "%-8.3g" % val
-            else:
-                valstr = val
-            key2str[self._truncate(key)] = self._truncate(valstr)
-        keywidth = max(map(len, key2str.keys()))
-        valwidth = max(map(len, key2str.values()))
-        # Write to all text outputs
-        self._write_text("-" * (keywidth + valwidth + 7), "\n")
-        for (key, val) in key2str.items():
-            self._write_text("| ", key, " " * (keywidth - len(key)), " | ",
-                             val, " " * (valwidth - len(val)), " |\n")
-        self._write_text("-" * (keywidth + valwidth + 7), "\n")
-        for f in self.text_outputs:
-            try:
-                f.flush()
-            except OSError:
-                sys.stderr.write('Warning! OSError when flushing.\n')
-        # Write to tensorboard
-        if self.tbwriter is not None:
-            self.tbwriter.write_values(self.name2val)
-            self.name2val.clear()
-
-    def log(self, *args, **kwargs):
-        level = kwargs['level'] if 'level' in kwargs else INFO
-        if self.level <= level:
-            self._do_log(*args)
-
-    # Configuration
-
-    def set_level(self, level):
-        self.level = level
-
-    def get_dir(self):
-        return self.dir
-
-    def close(self):
-        for f in self.text_outputs[1:]:
-            f.close()
-        if self.tbwriter:
-            self.tbwriter.close()
-
-    # Misc
-
-    def _do_log(self, *args):
-        self._write_text(*args + ('\n', ))
-        for f in self.text_outputs:
-            try:
-                f.flush()
-            except OSError:
-                print('Warning! OSError when flushing.')
-
-    def _write_text(self, *strings):
-        for f in self.text_outputs:
-            for string in strings:
-                f.write(string)
-
-    def _truncate(self, s):
-        if len(s) > 33:
-            return s[:30] + "..."
-        else:
-            return s
-
-
-_Logger.DEFAULT = _Logger()
-_Logger.CURRENT = _Logger.DEFAULT
@@ -23,6 +23,7 @@ OPTIMIZER_SHARED_CONFIGS = [
    "max_sample_requests_in_flight_per_worker",
 ]

+# yapf: disable
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
    # V-trace params (see vtrace.py).
@@ -65,8 +66,8 @@ DEFAULT_CONFIG = with_common_config({
    "vf_loss_coeff": 0.5,
    "entropy_coeff": -0.01,
 })
-
 # __sphinx_doc_end__
+# yapf: enable


 class ImpalaAgent(Agent):
@@ -8,6 +8,7 @@ from ray.rllib.optimizers import SyncSamplesOptimizer
 from ray.rllib.utils import merge_dicts
 from ray.tune.trial import Resources

+# yapf: disable
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
    # No remote workers by default
@@ -15,8 +16,8 @@ DEFAULT_CONFIG = with_common_config({
    # Learning rate
    "lr": 0.0004,
 })
-
 # __sphinx_doc_end__
+# yapf: enable


 class PGAgent(Agent):
@@ -2,12 +2,17 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import logging
+
 from ray.rllib.agents import Agent, with_common_config
 from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph
 from ray.rllib.utils import merge_dicts
 from ray.rllib.optimizers import SyncSamplesOptimizer, LocalMultiGPUOptimizer
 from ray.tune.trial import Resources

+logger = logging.getLogger(__name__)
+
+# yapf: disable
 # __sphinx_doc_begin__
 DEFAULT_CONFIG = with_common_config({
    # If true, use the Generalized Advantage Estimator (GAE)
@@ -55,8 +60,8 @@ DEFAULT_CONFIG = with_common_config({
    # Use the sync samples optimizer instead of the multi-gpu one
    "simple_optimizer": False,
 })
-
 # __sphinx_doc_end__
+# yapf: enable


 class PPOAgent(Agent):
@@ -111,7 +116,7 @@ class PPOAgent(Agent):
            if waste_ratio > 1.5:
                raise ValueError(msg)
            else:
-                print("Warning: " + msg)
+                logger.warn(msg)
        if self.config["sgd_minibatch_size"] > self.config["train_batch_size"]:
            raise ValueError(
                "Minibatch size {} must be <= train batch size {}.".format(
@@ -3,6 +3,7 @@ from __future__ import division
 from __future__ import print_function

 import gym
+import logging
 import pickle
 import tensorflow as tf

@@ -99,7 +100,8 @@ class PolicyEvaluator(EvaluatorInterface):
                 model_config=None,
                 policy_config=None,
                 worker_index=0,
-                 monitor_path=None):
+                 monitor_path=None,
+                 log_level=None):
        """Initialize a policy evaluator.

        Arguments:
@@ -158,8 +160,12 @@ class PolicyEvaluator(EvaluatorInterface):
                through EnvContext so that envs can be configured per worker.
            monitor_path (str): Write out episode stats and videos to this
                directory if specified.
+            log_level (str): Set the root log level on creation.
        """

+        if log_level:
+            logging.getLogger("ray.rllib").setLevel(log_level)
+
        env_context = EnvContext(env_config or {}, worker_index)
        policy_config = policy_config or {}
        self.policy_config = policy_config
@@ -3,6 +3,7 @@ from __future__ import division
 from __future__ import print_function

 from collections import defaultdict, namedtuple
+import logging
 import numpy as np
 import six.moves.queue as queue
 import threading
@@ -16,6 +17,8 @@ from ray.rllib.env.atari_wrappers import get_wrapper_by_cls, MonitorEnv
 from ray.rllib.models.action_dist import TupleActions
 from ray.rllib.utils.tf_run_builder import TFRunBuilder

+logger = logging.getLogger(__name__)
+
 RolloutMetrics = namedtuple(
    "RolloutMetrics", ["episode_length", "episode_reward", "agent_rewards"])

@@ -221,7 +224,7 @@ def _env_runner(async_vector_env,
            horizon = (
                async_vector_env.get_unwrapped()[0].spec.max_episode_steps)
    except Exception:
-        print("*** WARNING ***: no episode horizon specified, assuming inf")
+        logger.warn("no episode horizon specified, assuming inf")
    if not horizon:
        horizon = float("inf")

@@ -3,6 +3,7 @@ from __future__ import division
 from __future__ import print_function

 import gym
+import logging
 import numpy as np
 import tensorflow as tf
 from functools import partial
@@ -21,6 +22,9 @@ from ray.rllib.models.fcnet import FullyConnectedNetwork
 from ray.rllib.models.visionnet import VisionNetwork
 from ray.rllib.models.lstm import LSTM

+logger = logging.getLogger(__name__)
+
+# yapf: disable
 # __sphinx_doc_begin__
 MODEL_DEFAULTS = {
    # === Built-in options ===
@@ -67,8 +71,8 @@ MODEL_DEFAULTS = {
    # Extra options to pass to the custom classes
    "custom_options": {},
 }
-
 # __sphinx_doc_end__
+# yapf: enable


 class ModelCatalog(object):
@@ -200,7 +204,7 @@ class ModelCatalog(object):
                   seq_lens):
        if options.get("custom_model"):
            model = options["custom_model"]
-            print("Using custom model {}".format(model))
+            logger.info("Using custom model {}".format(model))
            return _global_registry.get(RLLIB_MODEL, model)(
                input_dict,
                obs_space,
@@ -238,7 +242,7 @@ class ModelCatalog(object):
        options = options or MODEL_DEFAULTS
        if options.get("custom_model"):
            model = options["custom_model"]
-            print("Using custom torch model {}".format(model))
+            logger.info("Using custom torch model {}".format(model))
            return _global_registry.get(RLLIB_MODEL, model)(
                input_shape, num_outputs, options)

@@ -271,7 +275,7 @@ class ModelCatalog(object):

        if options.get("custom_preprocessor"):
            preprocessor = options["custom_preprocessor"]
-            print("Using custom preprocessor {}".format(preprocessor))
+            logger.info("Using custom preprocessor {}".format(preprocessor))
            return _global_registry.get(RLLIB_PREPROCESSOR, preprocessor)(
                env.observation_space, options)

@@ -1,13 +1,17 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
 import cv2
+import logging
 import numpy as np
 import gym

 ATARI_OBS_SHAPE = (210, 160, 3)
 ATARI_RAM_OBS_SHAPE = (128, )

+logger = logging.getLogger(__name__)
+

 class Preprocessor(object):
    """Defines an abstract observation preprocessor function.
@@ -128,7 +132,7 @@ class TupleFlatteningPreprocessor(Preprocessor):
        self.preprocessors = []
        for i in range(len(self._obs_space.spaces)):
            space = self._obs_space.spaces[i]
-            print("Creating sub-preprocessor for", space)
+            logger.info("Creating sub-preprocessor for {}".format(space))
            preprocessor = get_preprocessor(space)(space, self._options)
            self.preprocessors.append(preprocessor)
            size += preprocessor.size
@@ -153,7 +157,7 @@ class DictFlatteningPreprocessor(Preprocessor):
        size = 0
        self.preprocessors = []
        for space in self._obs_space.spaces.values():
-            print("Creating sub-preprocessor for", space)
+            logger.info("Creating sub-preprocessor for {}".format(space))
            preprocessor = get_preprocessor(space)(space, self._options)
            self.preprocessors.append(preprocessor)
            size += preprocessor.size
@@ -2,10 +2,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import logging
+
 from ray.rllib.models.pytorch.model import Model, SlimFC
 from ray.rllib.models.pytorch.misc import normc_initializer
 import torch.nn as nn

+logger = logging.getLogger(__name__)
+

 class FullyConnectedNetwork(Model):
    """TODO(rliaw): Logits, Value should both be contained here"""
@@ -19,7 +23,7 @@ class FullyConnectedNetwork(Model):
            activation = nn.Tanh
        elif fcnet_activation == "relu":
            activation = nn.ReLU
-        print("Constructing fcnet {} {}".format(hiddens, activation))
+        logger.info("Constructing fcnet {} {}".format(hiddens, activation))

        layers = []
        last_layer_size = inputs
@@ -6,6 +6,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import logging
 import numpy as np
 import random
 import time
@@ -20,6 +21,8 @@ from ray.rllib.utils.actors import TaskPool
 from ray.rllib.utils.timer import TimerStat
 from ray.rllib.utils.window_stat import WindowStat

+logger = logging.getLogger(__name__)
+
 LEARNER_QUEUE_MAX_SIZE = 16
 NUM_DATA_LOAD_THREADS = 16

@@ -84,7 +87,7 @@ class TFMultiGPULearner(LearnerThread):
            self.devices = ["/cpu:0"]
        else:
            self.devices = ["/gpu:{}".format(i) for i in range(num_gpus)]
-            print("TFMultiGPULearner devices", self.devices)
+            logger.info("TFMultiGPULearner devices {}".format(self.devices))
        assert self.train_batch_size % len(self.devices) == 0
        assert self.train_batch_size >= len(self.devices), "batch too small"
        self.policy = self.local_evaluator.policy_map["default"]
@@ -199,7 +202,7 @@ class AsyncSamplesOptimizer(PolicyOptimizer):
        self.sample_batch_size = sample_batch_size

        if num_gpus > 1 or num_parallel_data_loaders > 1:
-            print(
+            logger.info(
                "Enabling multi-GPU mode, {} GPUs, {} parallel loaders".format(
                    num_gpus, num_parallel_data_loaders))
            if train_batch_size // max(1, num_gpus) % (
@@ -2,6 +2,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import logging
 import numpy as np
 from collections import defaultdict
 import tensorflow as tf
@@ -12,6 +13,8 @@ from ray.rllib.optimizers.policy_optimizer import PolicyOptimizer
 from ray.rllib.optimizers.multi_gpu_impl import LocalSyncParallelOptimizer
 from ray.rllib.utils.timer import TimerStat

+logger = logging.getLogger(__name__)
+

 class LocalMultiGPUOptimizer(PolicyOptimizer):
    """A synchronous optimizer that uses multiple local GPUs.
@@ -53,7 +56,7 @@ class LocalMultiGPUOptimizer(PolicyOptimizer):
        self.update_weights_timer = TimerStat()
        self.standardize_fields = standardize_fields

-        print("LocalMultiGPUOptimizer devices", self.devices)
+        logger.info("LocalMultiGPUOptimizer devices {}".format(self.devices))

        if set(self.local_evaluator.policy_map.keys()) != {"default"}:
            raise ValueError(
@@ -126,7 +129,7 @@ class LocalMultiGPUOptimizer(PolicyOptimizer):
        with self.grad_timer:
            num_batches = (
                int(tuples_per_device) // int(self.per_device_batch_size))
-            print("== sgd epochs ==")
+            logger.debug("== sgd epochs ==")
            for i in range(self.num_sgd_iter):
                iter_extra_fetches = defaultdict(list)
                permutation = np.random.permutation(num_batches)
@@ -136,7 +139,7 @@ class LocalMultiGPUOptimizer(PolicyOptimizer):
                        permutation[batch_index] * self.per_device_batch_size)
                    for k, v in batch_fetches.items():
                        iter_extra_fetches[k].append(v)
-                print(i, _averaged(iter_extra_fetches))
+                logger.debug("{} {}".format(i, _averaged(iter_extra_fetches)))

        self.num_steps_sampled += samples.count
        self.num_steps_trained += samples.count
@@ -2,11 +2,15 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import logging
+
 import ray
 from ray.rllib.evaluation.policy_evaluator import PolicyEvaluator
 from ray.rllib.evaluation.metrics import collect_episodes, summarize_episodes
 from ray.rllib.evaluation.sample_batch import MultiAgentBatch

+logger = logging.getLogger(__name__)
+

 class PolicyOptimizer(object):
    """Policy optimizers encapsulate distributed RL optimization strategies.
@@ -3,11 +3,14 @@ from __future__ import division
 from __future__ import print_function

 import ray
+import logging
 from ray.rllib.optimizers.policy_optimizer import PolicyOptimizer
 from ray.rllib.evaluation.sample_batch import SampleBatch
 from ray.rllib.utils.filter import RunningStat
 from ray.rllib.utils.timer import TimerStat

+logger = logging.getLogger(__name__)
+

 class SyncSamplesOptimizer(PolicyOptimizer):
    """A simple synchronous RL optimizer.
@@ -52,7 +55,7 @@ class SyncSamplesOptimizer(PolicyOptimizer):
                if "stats" in fetches:
                    self.learner_stats = fetches["stats"]
                if self.num_sgd_iter > 1:
-                    print(i, fetches)
+                    logger.debug("{} {}".format(i, fetches))
            self.grad_timer.push_units_processed(samples.count)

        self.num_steps_sampled += samples.count
@@ -2,9 +2,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import logging
 import os
 import ray

+logger = logging.getLogger(__name__)
+

 class TaskPool(object):
    """Helper class for tracking the status of many in-flight actor tasks."""
@@ -80,11 +83,12 @@ def split_colocated(actors):
 def try_create_colocated(cls, args, count):
    actors = [cls.remote(*args) for _ in range(count)]
    local, _ = split_colocated(actors)
-    print("Got {} colocated actors of {}".format(len(local), count))
+    logger.info("Got {} colocated actors of {}".format(len(local), count))
    return local


 def create_colocated(cls, args, count):
+    logger.info("Trying to create {} colocated actors".format(count))
    ok = []
    i = 1
    while len(ok) < count and i < 10:
@@ -2,18 +2,21 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import logging
 import time
 import base64
 import numpy as np
 import pyarrow

+logger = logging.getLogger(__name__)
+
 try:
    import lz4.frame
    LZ4_ENABLED = True
 except ImportError:
-    print("WARNING: lz4 not available, disabling sample compression. "
-          "This will significantly impact RLlib performance. "
-          "To install lz4, run `pip install lz4`.")
+    logger.warn("lz4 not available, disabling sample compression. "
+                "This will significantly impact RLlib performance. "
+                "To install lz4, run `pip install lz4`.")
    LZ4_ENABLED = False


@@ -2,14 +2,17 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import logging
 import pickle

+logger = logging.getLogger(__name__)
+
 try:
    import requests  # `requests` is not part of stdlib.
 except ImportError:
    requests = None
-    print("Couldn't import `requests` library. Be sure to install it on"
-          " the client side.")
+    logger.warn("Couldn't import `requests` library. Be sure to install it on"
+                " the client side.")


 class PolicyClient(object):
@@ -109,8 +112,7 @@ class PolicyClient(object):
        payload = pickle.dumps(data)
        response = requests.post(self._address, data=payload)
        if response.status_code != 200:
-            print("Request failed", data)
-            print(response.text)
+            logger.error("Request failed {}: {}".format(response.text, data))
        response.raise_for_status()
        parsed = pickle.loads(response.content)
        return parsed
@@ -2,12 +2,15 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import logging
 import os
 import time

 import tensorflow as tf
 from tensorflow.python.client import timeline

+logger = logging.getLogger(__name__)
+

 class TFRunBuilder(object):
    """Used to incrementally build up a TensorFlow run.
@@ -43,7 +46,7 @@ class TFRunBuilder(object):
                    self.session, self.fetches, self.debug_name,
                    self.feed_dict, os.environ.get("TF_TIMELINE_DIR"))
            except Exception as e:
-                print("Error fetching: {}, feed_dict={}".format(
+                logger.error("Error fetching: {}, feed_dict={}".format(
                    self.fetches, self.feed_dict))
                raise e
        if isinstance(to_fetch, int):
@@ -76,8 +79,8 @@ def run_timeline(sess, ops, debug_name, feed_dict={}, timeline_dir=None):
                debug_name, os.getpid(), _count))
        _count += 1
        trace_file = open(outf, "w")
-        print("Wrote tf timeline ({} s) to {}".format(time.time() - start,
-                                                      os.path.abspath(outf)))
+        logger.info("Wrote tf timeline ({} s) to {}".format(
+            time.time() - start, os.path.abspath(outf)))
        trace_file.write(trace.generate_chrome_trace_format())
    else:
        fetches = sess.run(ops, feed_dict=feed_dict)