[rllib] switch to python logger (#3098)

* logg

* set rllib logger

* comment

* info

* rlib

* comment

* add format

* fix lint

* add file info

* update

* add ts

* lint

* better docs

* fix value error

* soft log level
This commit is contained in:
Eric Liang
2018-10-21 23:43:57 -07:00
committed by GitHub
parent 40c4148d4f
commit 221d1663c1
25 changed files with 160 additions and 320 deletions
+14
View File
@@ -2,6 +2,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
# Note: do not introduce unnecessary library dependencies here, e.g. gym.
# This file is imported from the tune module in order to register RLlib agents.
from ray.tune.registry import register_trainable
@@ -16,6 +18,17 @@ from ray.rllib.evaluation.policy_evaluator import PolicyEvaluator
from ray.rllib.evaluation.sample_batch import SampleBatch
def _setup_logger():
logger = logging.getLogger("ray.rllib")
handler = logging.StreamHandler()
handler.setFormatter(
logging.Formatter(
"%(asctime)s\t%(levelname)s %(filename)s:%(lineno)s -- %(message)s"
))
logger.addHandler(handler)
logger.propagate = False
def _register_all():
for key in [
@@ -27,6 +40,7 @@ def _register_all():
register_trainable(key, get_agent_class(key))
_setup_logger()
_register_all()
__all__ = [
+2 -1
View File
@@ -10,6 +10,7 @@ from ray.rllib.optimizers import AsyncGradientsOptimizer
from ray.rllib.utils import merge_dicts
from ray.tune.trial import Resources
# yapf: disable
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
# Size of rollout batch
@@ -36,8 +37,8 @@ DEFAULT_CONFIG = with_common_config({
# sample_batch_size by up to 5x due to async buffering of batches.
"sample_async": True,
})
# __sphinx_doc_end__
# yapf: enable
class A3CAgent(Agent):
+34 -21
View File
@@ -4,6 +4,7 @@ from __future__ import print_function
import copy
import os
import logging
import pickle
import tempfile
from datetime import datetime
@@ -19,12 +20,38 @@ from ray.tune.trainable import Trainable
from ray.tune.logger import UnifiedLogger
from ray.tune.result import DEFAULT_RESULTS_DIR
# yapf: disable
# __sphinx_doc_begin__
COMMON_CONFIG = {
# === Debugging ===
# Whether to write episode stats and videos to the agent log dir
"monitor": False,
# Set the RLlib log level for the agent process and its remote evaluators
"log_level": "INFO",
# === Policy ===
# Arguments to pass to model. See models/catalog.py for a full list of the
# available model options.
"model": MODEL_DEFAULTS,
# Arguments to pass to the policy optimizer. These vary by optimizer.
"optimizer": {},
# === Environment ===
# Discount factor of the MDP
"gamma": 0.99,
# Number of steps after which the episode is forced to terminate
"horizon": None,
# Arguments to pass to the env creator
"env_config": {},
# Environment name can also be passed via config
"env": None,
# Whether to clip rewards prior to experience postprocessing. Setting to
# None means clip for Atari only.
"clip_rewards": None,
# Whether to use rllib or deepmind preprocessors by default
"preprocessor_pref": "deepmind",
# === Execution ===
# Number of environments to evaluate vectorwise per worker.
"num_envs_per_worker": 1,
# Number of actors used for parallelism
@@ -42,20 +69,6 @@ COMMON_CONFIG = {
"observation_filter": "NoFilter",
# Whether to synchronize the statistics of remote filters.
"synchronize_filters": True,
# Whether to clip rewards prior to experience postprocessing. Setting to
# None means clip for Atari only.
"clip_rewards": None,
# Whether to use rllib or deepmind preprocessors
"preprocessor_pref": "deepmind",
# Arguments to pass to the env creator
"env_config": {},
# Environment name can also be passed via config
"env": None,
# Arguments to pass to model. See models/catalog.py for a full list of the
# available model options.
"model": MODEL_DEFAULTS,
# Arguments to pass to the policy optimizer. These vary by optimizer.
"optimizer": {},
# Configure TF for single-process operation by default
"tf_session_args": {
# note: parallelism_threads is set to auto for the local evaluator
@@ -72,8 +85,6 @@ COMMON_CONFIG = {
},
# Whether to LZ4 compress observations
"compress_observations": False,
# Whether to write episode stats and videos to the agent log dir
"monitor": False,
# Allocate a fraction of a GPU instead of one (e.g., 0.3 GPUs)
"gpu_fraction": 1,
@@ -88,8 +99,8 @@ COMMON_CONFIG = {
"policies_to_train": None,
},
}
# __sphinx_doc_end__
# yapf: enable
def with_common_config(extra_config):
@@ -170,7 +181,8 @@ class Agent(Trainable):
model_config=config["model"],
policy_config=config,
worker_index=worker_index,
monitor_path=self.logdir if config["monitor"] else None)
monitor_path=self.logdir if config["monitor"] else None,
log_level=config["log_level"])
@classmethod
def resource_help(cls, config):
@@ -197,13 +209,12 @@ class Agent(Trainable):
# Agents allow env ids to be passed directly to the constructor.
self._env_id = env or config.get("env")
if not self._env_id:
raise ValueError("Must specify env (str) when creating agent")
# Create a default logger creator if no logger_creator is specified
if logger_creator is None:
timestr = datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
logdir_prefix = '_'.join([self._agent_name, self._env_id, timestr])
logdir_prefix = "{}_{}_{}".format(
[self._agent_name, self._env_id, timestr])
def default_logger_creator(config):
"""Creates a Unified logger with a default logdir prefix
@@ -256,6 +267,8 @@ class Agent(Trainable):
self._allow_unknown_configs,
self._allow_unknown_subkeys)
self.config = merged_config
if self.config.get("log_level"):
logging.getLogger("ray.rllib").setLevel(self.config["log_level"])
# TODO(ekl) setting the graph is unnecessary for PyTorch agents
with tf.Graph().as_default():
+11 -16
View File
@@ -7,6 +7,7 @@ from __future__ import division
from __future__ import print_function
from collections import namedtuple
import logging
import numpy as np
import time
@@ -16,14 +17,16 @@ from ray.tune.trial import Resources
from ray.rllib.agents.ars import optimizers
from ray.rllib.agents.ars import policies
from ray.rllib.agents.es import tabular_logger as tlogger
from ray.rllib.agents.ars import utils
logger = logging.getLogger(__name__)
Result = namedtuple("Result", [
"noise_indices", "noisy_returns", "sign_noisy_returns", "noisy_lengths",
"eval_returns", "eval_lengths"
])
# yapf: disable
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
"noise_stdev": 0.02, # std deviation of parameter noise
@@ -38,6 +41,7 @@ DEFAULT_CONFIG = with_common_config({
"offset": 0,
})
# __sphinx_doc_end__
# yapf: enable
@ray.remote
@@ -163,12 +167,12 @@ class ARSAgent(Agent):
self.report_length = self.config["report_length"]
# Create the shared noise table.
print("Creating shared noise table.")
logger.info("Creating shared noise table.")
noise_id = create_shared_noise.remote(self.config["noise_size"])
self.noise = SharedNoiseTable(ray.get(noise_id))
# Create the actors.
print("Creating actors.")
logger.info("Creating actors.")
self.workers = [
Worker.remote(self.config, self.env_creator, noise_id)
for _ in range(self.config["num_workers"])
@@ -182,8 +186,9 @@ class ARSAgent(Agent):
num_episodes, num_timesteps = 0, 0
results = []
while num_episodes < min_episodes:
print("Collected {} episodes {} timesteps so far this iter".format(
num_episodes, num_timesteps))
logger.info(
"Collected {} episodes {} timesteps so far this iter".format(
num_episodes, num_timesteps))
rollout_ids = [
worker.do_rollouts.remote(theta_id) for worker in self.workers
]
@@ -263,7 +268,6 @@ class ARSAgent(Agent):
g /= np.std(noisy_returns)
assert (g.shape == (self.policy.num_params, )
and g.dtype == np.float32)
print('the number of policy params is, ', self.policy.num_params)
# Compute the new weights theta.
theta, update_ratio = self.optimizer.update(-g)
# Set the new weights in the local copy of the policy.
@@ -272,18 +276,9 @@ class ARSAgent(Agent):
if len(all_eval_returns) > 0:
self.reward_list.append(eval_returns.mean())
tlogger.record_tabular("NoisyEpRewMean", noisy_returns.mean())
tlogger.record_tabular("NoisyEpRewStd", noisy_returns.std())
tlogger.record_tabular("NoisyEpLenMean", noisy_lengths.mean())
tlogger.record_tabular("WeightsNorm", float(np.square(theta).sum()))
tlogger.record_tabular("WeightsStd", float(np.std(theta)))
tlogger.record_tabular("Grad2Norm", float(np.sqrt(np.square(g).sum())))
tlogger.record_tabular("UpdateRatio", float(update_ratio))
tlogger.dump_tabular()
info = {
"weights_norm": np.square(theta).sum(),
"weights_std": np.std(theta),
"grad_norm": np.square(g).sum(),
"update_ratio": update_ratio,
"episodes_this_iter": noisy_lengths.size,
+2 -1
View File
@@ -13,6 +13,7 @@ OPTIMIZER_SHARED_CONFIGS = [
"train_batch_size", "learning_starts"
]
# yapf: disable
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
# === Model ===
@@ -108,8 +109,8 @@ DEFAULT_CONFIG = with_common_config({
# Prevent iterations from going lower than this time span
"min_iter_time_s": 1,
})
# __sphinx_doc_end__
# yapf: enable
class DDPGAgent(DQNAgent):
+2 -1
View File
@@ -6,6 +6,7 @@ from ray.rllib.agents.dqn.dqn import DQNAgent, DEFAULT_CONFIG as DQN_CONFIG
from ray.rllib.utils import merge_dicts
from ray.tune.trial import Resources
# yapf: disable
# __sphinx_doc_begin__
APEX_DEFAULT_CONFIG = merge_dicts(
DQN_CONFIG, # see also the options in dqn.py, which are also supported
@@ -31,8 +32,8 @@ APEX_DEFAULT_CONFIG = merge_dicts(
"min_iter_time_s": 30,
},
)
# __sphinx_doc_end__
# yapf: enable
class ApexAgent(DQNAgent):
+2 -1
View File
@@ -20,6 +20,7 @@ OPTIMIZER_SHARED_CONFIGS = [
"learning_starts"
]
# yapf: disable
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
# === Model ===
@@ -116,8 +117,8 @@ DEFAULT_CONFIG = with_common_config({
# Prevent iterations from going lower than this time span
"min_iter_time_s": 1,
})
# __sphinx_doc_end__
# yapf: enable
class DQNAgent(Agent):
+10 -20
View File
@@ -6,6 +6,7 @@ from __future__ import division
from __future__ import print_function
from collections import namedtuple
import logging
import numpy as np
import time
@@ -15,15 +16,17 @@ from ray.tune.trial import Resources
from ray.rllib.agents.es import optimizers
from ray.rllib.agents.es import policies
from ray.rllib.agents.es import tabular_logger as tlogger
from ray.rllib.agents.es import utils
from ray.rllib.utils import merge_dicts
logger = logging.getLogger(__name__)
Result = namedtuple("Result", [
"noise_indices", "noisy_returns", "sign_noisy_returns", "noisy_lengths",
"eval_returns", "eval_lengths"
])
# yapf: disable
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
"l2_coeff": 0.005,
@@ -39,6 +42,7 @@ DEFAULT_CONFIG = with_common_config({
"report_length": 10,
})
# __sphinx_doc_end__
# yapf: enable
@ray.remote
@@ -169,12 +173,12 @@ class ESAgent(Agent):
self.report_length = self.config["report_length"]
# Create the shared noise table.
print("Creating shared noise table.")
logger.info("Creating shared noise table.")
noise_id = create_shared_noise.remote(self.config["noise_size"])
self.noise = SharedNoiseTable(ray.get(noise_id))
# Create the actors.
print("Creating actors.")
logger.info("Creating actors.")
self.workers = [
Worker.remote(self.config, policy_params, self.env_creator,
noise_id) for _ in range(self.config["num_workers"])
@@ -188,8 +192,9 @@ class ESAgent(Agent):
num_episodes, num_timesteps = 0, 0
results = []
while num_episodes < min_episodes or num_timesteps < min_timesteps:
print("Collected {} episodes {} timesteps so far this iter".format(
num_episodes, num_timesteps))
logger.info(
"Collected {} episodes {} timesteps so far this iter".format(
num_episodes, num_timesteps))
rollout_ids = [
worker.do_rollouts.remote(theta_id) for worker in self.workers
]
@@ -269,21 +274,6 @@ class ESAgent(Agent):
if len(all_eval_returns) > 0:
self.reward_list.append(np.mean(eval_returns))
tlogger.record_tabular("EvalEpRewStd", eval_returns.std())
tlogger.record_tabular("EvalEpLenMean", eval_lengths.mean())
tlogger.record_tabular("EpRewMean", noisy_returns.mean())
tlogger.record_tabular("EpRewStd", noisy_returns.std())
tlogger.record_tabular("EpLenMean", noisy_lengths.mean())
tlogger.record_tabular("Norm", float(np.square(theta).sum()))
tlogger.record_tabular("GradNorm", float(np.square(g).sum()))
tlogger.record_tabular("UpdateRatio", float(update_ratio))
tlogger.record_tabular("EpisodesThisIter", noisy_lengths.size)
tlogger.record_tabular("EpisodesSoFar", self.episodes_so_far)
tlogger.dump_tabular()
info = {
"weights_norm": np.square(theta).sum(),
"grad_norm": np.square(g).sum(),
@@ -1,229 +0,0 @@
# Code in this file is copied and adapted from
# https://github.com/openai/evolution-strategies-starter.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import os
import sys
import time
import tensorflow as tf
from tensorflow.core.util import event_pb2
from tensorflow.python import pywrap_tensorflow
from tensorflow.python.util import compat
DEBUG = 10
INFO = 20
WARN = 30
ERROR = 40
DISABLED = 50
class TbWriter(object):
"""Based on SummaryWriter, but changed to allow for a different prefix."""
def __init__(self, dir, prefix):
self.dir = dir
# Start at 1, because EvWriter automatically generates an object with
# step = 0.
self.step = 1
self.evwriter = pywrap_tensorflow.EventsWriter(
compat.as_bytes(os.path.join(dir, prefix)))
def write_values(self, key2val):
summary = tf.Summary(value=[
tf.Summary.Value(tag=k, simple_value=float(v))
for (k, v) in key2val.items()
])
event = event_pb2.Event(wall_time=time.time(), summary=summary)
event.step = self.step
self.evwriter.WriteEvent(event)
self.evwriter.Flush()
self.step += 1
def close(self):
self.evwriter.Close()
# API
def start(dir):
if _Logger.CURRENT is not _Logger.DEFAULT:
sys.stderr.write("WARNING: You asked to start logging (dir=%s), but "
"you never stopped the previous logger (dir=%s)."
"\n" % (dir, _Logger.CURRENT.dir))
_Logger.CURRENT = _Logger(dir=dir)
def stop():
if _Logger.CURRENT is _Logger.DEFAULT:
sys.stderr.write("WARNING: You asked to stop logging, but you never "
"started any previous logger."
"\n" % (dir, _Logger.CURRENT.dir))
return
_Logger.CURRENT.close()
_Logger.CURRENT = _Logger.DEFAULT
def record_tabular(key, val):
"""Log a value of some diagnostic.
Call this once for each diagnostic quantity, each iteration.
"""
_Logger.CURRENT.record_tabular(key, val)
def dump_tabular():
"""Write all of the diagnostics from the current iteration."""
_Logger.CURRENT.dump_tabular()
def log(*args, **kwargs):
"""Write the sequence of args, with no separators.
This is written to the console and output files (if you've configured an
output file).
"""
level = kwargs['level'] if 'level' in kwargs else INFO
_Logger.CURRENT.log(*args, level=level)
def debug(*args):
log(*args, level=DEBUG)
def info(*args):
log(*args, level=INFO)
def warn(*args):
log(*args, level=WARN)
def error(*args):
log(*args, level=ERROR)
def set_level(level):
"""
Set logging threshold on current logger.
"""
_Logger.CURRENT.set_level(level)
def get_dir():
"""
Get directory that log files are being written to.
will be None if there is no output directory (i.e., if you didn't call
start)
"""
return _Logger.CURRENT.get_dir()
def get_expt_dir():
sys.stderr.write("get_expt_dir() is Deprecated. Switch to get_dir()\n")
return get_dir()
# Backend
class _Logger(object):
# A logger with no output files. (See right below class definition) so that
# you can still log to the terminal without setting up any output files.
DEFAULT = None
# Current logger being used by the free functions above.
CURRENT = None
def __init__(self, dir=None):
self.name2val = OrderedDict() # Values this iteration.
self.level = INFO
self.dir = dir
self.text_outputs = [sys.stdout]
if dir is not None:
os.makedirs(dir, exist_ok=True)
self.text_outputs.append(open(os.path.join(dir, "log.txt"), "w"))
self.tbwriter = TbWriter(dir=dir, prefix="events")
else:
self.tbwriter = None
# Logging API, forwarded
def record_tabular(self, key, val):
self.name2val[key] = val
def dump_tabular(self):
# Create strings for printing.
key2str = OrderedDict()
for (key, val) in self.name2val.items():
if hasattr(val, "__float__"):
valstr = "%-8.3g" % val
else:
valstr = val
key2str[self._truncate(key)] = self._truncate(valstr)
keywidth = max(map(len, key2str.keys()))
valwidth = max(map(len, key2str.values()))
# Write to all text outputs
self._write_text("-" * (keywidth + valwidth + 7), "\n")
for (key, val) in key2str.items():
self._write_text("| ", key, " " * (keywidth - len(key)), " | ",
val, " " * (valwidth - len(val)), " |\n")
self._write_text("-" * (keywidth + valwidth + 7), "\n")
for f in self.text_outputs:
try:
f.flush()
except OSError:
sys.stderr.write('Warning! OSError when flushing.\n')
# Write to tensorboard
if self.tbwriter is not None:
self.tbwriter.write_values(self.name2val)
self.name2val.clear()
def log(self, *args, **kwargs):
level = kwargs['level'] if 'level' in kwargs else INFO
if self.level <= level:
self._do_log(*args)
# Configuration
def set_level(self, level):
self.level = level
def get_dir(self):
return self.dir
def close(self):
for f in self.text_outputs[1:]:
f.close()
if self.tbwriter:
self.tbwriter.close()
# Misc
def _do_log(self, *args):
self._write_text(*args + ('\n', ))
for f in self.text_outputs:
try:
f.flush()
except OSError:
print('Warning! OSError when flushing.')
def _write_text(self, *strings):
for f in self.text_outputs:
for string in strings:
f.write(string)
def _truncate(self, s):
if len(s) > 33:
return s[:30] + "..."
else:
return s
_Logger.DEFAULT = _Logger()
_Logger.CURRENT = _Logger.DEFAULT
+2 -1
View File
@@ -23,6 +23,7 @@ OPTIMIZER_SHARED_CONFIGS = [
"max_sample_requests_in_flight_per_worker",
]
# yapf: disable
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
# V-trace params (see vtrace.py).
@@ -65,8 +66,8 @@ DEFAULT_CONFIG = with_common_config({
"vf_loss_coeff": 0.5,
"entropy_coeff": -0.01,
})
# __sphinx_doc_end__
# yapf: enable
class ImpalaAgent(Agent):
+2 -1
View File
@@ -8,6 +8,7 @@ from ray.rllib.optimizers import SyncSamplesOptimizer
from ray.rllib.utils import merge_dicts
from ray.tune.trial import Resources
# yapf: disable
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
# No remote workers by default
@@ -15,8 +16,8 @@ DEFAULT_CONFIG = with_common_config({
# Learning rate
"lr": 0.0004,
})
# __sphinx_doc_end__
# yapf: enable
class PGAgent(Agent):
+7 -2
View File
@@ -2,12 +2,17 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
from ray.rllib.agents import Agent, with_common_config
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph
from ray.rllib.utils import merge_dicts
from ray.rllib.optimizers import SyncSamplesOptimizer, LocalMultiGPUOptimizer
from ray.tune.trial import Resources
logger = logging.getLogger(__name__)
# yapf: disable
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
# If true, use the Generalized Advantage Estimator (GAE)
@@ -55,8 +60,8 @@ DEFAULT_CONFIG = with_common_config({
# Use the sync samples optimizer instead of the multi-gpu one
"simple_optimizer": False,
})
# __sphinx_doc_end__
# yapf: enable
class PPOAgent(Agent):
@@ -111,7 +116,7 @@ class PPOAgent(Agent):
if waste_ratio > 1.5:
raise ValueError(msg)
else:
print("Warning: " + msg)
logger.warn(msg)
if self.config["sgd_minibatch_size"] > self.config["train_batch_size"]:
raise ValueError(
"Minibatch size {} must be <= train batch size {}.".format(
@@ -3,6 +3,7 @@ from __future__ import division
from __future__ import print_function
import gym
import logging
import pickle
import tensorflow as tf
@@ -99,7 +100,8 @@ class PolicyEvaluator(EvaluatorInterface):
model_config=None,
policy_config=None,
worker_index=0,
monitor_path=None):
monitor_path=None,
log_level=None):
"""Initialize a policy evaluator.
Arguments:
@@ -158,8 +160,12 @@ class PolicyEvaluator(EvaluatorInterface):
through EnvContext so that envs can be configured per worker.
monitor_path (str): Write out episode stats and videos to this
directory if specified.
log_level (str): Set the root log level on creation.
"""
if log_level:
logging.getLogger("ray.rllib").setLevel(log_level)
env_context = EnvContext(env_config or {}, worker_index)
policy_config = policy_config or {}
self.policy_config = policy_config
+4 -1
View File
@@ -3,6 +3,7 @@ from __future__ import division
from __future__ import print_function
from collections import defaultdict, namedtuple
import logging
import numpy as np
import six.moves.queue as queue
import threading
@@ -16,6 +17,8 @@ from ray.rllib.env.atari_wrappers import get_wrapper_by_cls, MonitorEnv
from ray.rllib.models.action_dist import TupleActions
from ray.rllib.utils.tf_run_builder import TFRunBuilder
logger = logging.getLogger(__name__)
RolloutMetrics = namedtuple(
"RolloutMetrics", ["episode_length", "episode_reward", "agent_rewards"])
@@ -221,7 +224,7 @@ def _env_runner(async_vector_env,
horizon = (
async_vector_env.get_unwrapped()[0].spec.max_episode_steps)
except Exception:
print("*** WARNING ***: no episode horizon specified, assuming inf")
logger.warn("no episode horizon specified, assuming inf")
if not horizon:
horizon = float("inf")
+8 -4
View File
@@ -3,6 +3,7 @@ from __future__ import division
from __future__ import print_function
import gym
import logging
import numpy as np
import tensorflow as tf
from functools import partial
@@ -21,6 +22,9 @@ from ray.rllib.models.fcnet import FullyConnectedNetwork
from ray.rllib.models.visionnet import VisionNetwork
from ray.rllib.models.lstm import LSTM
logger = logging.getLogger(__name__)
# yapf: disable
# __sphinx_doc_begin__
MODEL_DEFAULTS = {
# === Built-in options ===
@@ -67,8 +71,8 @@ MODEL_DEFAULTS = {
# Extra options to pass to the custom classes
"custom_options": {},
}
# __sphinx_doc_end__
# yapf: enable
class ModelCatalog(object):
@@ -200,7 +204,7 @@ class ModelCatalog(object):
seq_lens):
if options.get("custom_model"):
model = options["custom_model"]
print("Using custom model {}".format(model))
logger.info("Using custom model {}".format(model))
return _global_registry.get(RLLIB_MODEL, model)(
input_dict,
obs_space,
@@ -238,7 +242,7 @@ class ModelCatalog(object):
options = options or MODEL_DEFAULTS
if options.get("custom_model"):
model = options["custom_model"]
print("Using custom torch model {}".format(model))
logger.info("Using custom torch model {}".format(model))
return _global_registry.get(RLLIB_MODEL, model)(
input_shape, num_outputs, options)
@@ -271,7 +275,7 @@ class ModelCatalog(object):
if options.get("custom_preprocessor"):
preprocessor = options["custom_preprocessor"]
print("Using custom preprocessor {}".format(preprocessor))
logger.info("Using custom preprocessor {}".format(preprocessor))
return _global_registry.get(RLLIB_PREPROCESSOR, preprocessor)(
env.observation_space, options)
+6 -2
View File
@@ -1,13 +1,17 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import logging
import numpy as np
import gym
ATARI_OBS_SHAPE = (210, 160, 3)
ATARI_RAM_OBS_SHAPE = (128, )
logger = logging.getLogger(__name__)
class Preprocessor(object):
"""Defines an abstract observation preprocessor function.
@@ -128,7 +132,7 @@ class TupleFlatteningPreprocessor(Preprocessor):
self.preprocessors = []
for i in range(len(self._obs_space.spaces)):
space = self._obs_space.spaces[i]
print("Creating sub-preprocessor for", space)
logger.info("Creating sub-preprocessor for {}".format(space))
preprocessor = get_preprocessor(space)(space, self._options)
self.preprocessors.append(preprocessor)
size += preprocessor.size
@@ -153,7 +157,7 @@ class DictFlatteningPreprocessor(Preprocessor):
size = 0
self.preprocessors = []
for space in self._obs_space.spaces.values():
print("Creating sub-preprocessor for", space)
logger.info("Creating sub-preprocessor for {}".format(space))
preprocessor = get_preprocessor(space)(space, self._options)
self.preprocessors.append(preprocessor)
size += preprocessor.size
+5 -1
View File
@@ -2,10 +2,14 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
from ray.rllib.models.pytorch.model import Model, SlimFC
from ray.rllib.models.pytorch.misc import normc_initializer
import torch.nn as nn
logger = logging.getLogger(__name__)
class FullyConnectedNetwork(Model):
"""TODO(rliaw): Logits, Value should both be contained here"""
@@ -19,7 +23,7 @@ class FullyConnectedNetwork(Model):
activation = nn.Tanh
elif fcnet_activation == "relu":
activation = nn.ReLU
print("Constructing fcnet {} {}".format(hiddens, activation))
logger.info("Constructing fcnet {} {}".format(hiddens, activation))
layers = []
last_layer_size = inputs
@@ -6,6 +6,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import numpy as np
import random
import time
@@ -20,6 +21,8 @@ from ray.rllib.utils.actors import TaskPool
from ray.rllib.utils.timer import TimerStat
from ray.rllib.utils.window_stat import WindowStat
logger = logging.getLogger(__name__)
LEARNER_QUEUE_MAX_SIZE = 16
NUM_DATA_LOAD_THREADS = 16
@@ -84,7 +87,7 @@ class TFMultiGPULearner(LearnerThread):
self.devices = ["/cpu:0"]
else:
self.devices = ["/gpu:{}".format(i) for i in range(num_gpus)]
print("TFMultiGPULearner devices", self.devices)
logger.info("TFMultiGPULearner devices {}".format(self.devices))
assert self.train_batch_size % len(self.devices) == 0
assert self.train_batch_size >= len(self.devices), "batch too small"
self.policy = self.local_evaluator.policy_map["default"]
@@ -199,7 +202,7 @@ class AsyncSamplesOptimizer(PolicyOptimizer):
self.sample_batch_size = sample_batch_size
if num_gpus > 1 or num_parallel_data_loaders > 1:
print(
logger.info(
"Enabling multi-GPU mode, {} GPUs, {} parallel loaders".format(
num_gpus, num_parallel_data_loaders))
if train_batch_size // max(1, num_gpus) % (
@@ -2,6 +2,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import numpy as np
from collections import defaultdict
import tensorflow as tf
@@ -12,6 +13,8 @@ from ray.rllib.optimizers.policy_optimizer import PolicyOptimizer
from ray.rllib.optimizers.multi_gpu_impl import LocalSyncParallelOptimizer
from ray.rllib.utils.timer import TimerStat
logger = logging.getLogger(__name__)
class LocalMultiGPUOptimizer(PolicyOptimizer):
"""A synchronous optimizer that uses multiple local GPUs.
@@ -53,7 +56,7 @@ class LocalMultiGPUOptimizer(PolicyOptimizer):
self.update_weights_timer = TimerStat()
self.standardize_fields = standardize_fields
print("LocalMultiGPUOptimizer devices", self.devices)
logger.info("LocalMultiGPUOptimizer devices {}".format(self.devices))
if set(self.local_evaluator.policy_map.keys()) != {"default"}:
raise ValueError(
@@ -126,7 +129,7 @@ class LocalMultiGPUOptimizer(PolicyOptimizer):
with self.grad_timer:
num_batches = (
int(tuples_per_device) // int(self.per_device_batch_size))
print("== sgd epochs ==")
logger.debug("== sgd epochs ==")
for i in range(self.num_sgd_iter):
iter_extra_fetches = defaultdict(list)
permutation = np.random.permutation(num_batches)
@@ -136,7 +139,7 @@ class LocalMultiGPUOptimizer(PolicyOptimizer):
permutation[batch_index] * self.per_device_batch_size)
for k, v in batch_fetches.items():
iter_extra_fetches[k].append(v)
print(i, _averaged(iter_extra_fetches))
logger.debug("{} {}".format(i, _averaged(iter_extra_fetches)))
self.num_steps_sampled += samples.count
self.num_steps_trained += samples.count
@@ -2,11 +2,15 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import ray
from ray.rllib.evaluation.policy_evaluator import PolicyEvaluator
from ray.rllib.evaluation.metrics import collect_episodes, summarize_episodes
from ray.rllib.evaluation.sample_batch import MultiAgentBatch
logger = logging.getLogger(__name__)
class PolicyOptimizer(object):
"""Policy optimizers encapsulate distributed RL optimization strategies.
@@ -3,11 +3,14 @@ from __future__ import division
from __future__ import print_function
import ray
import logging
from ray.rllib.optimizers.policy_optimizer import PolicyOptimizer
from ray.rllib.evaluation.sample_batch import SampleBatch
from ray.rllib.utils.filter import RunningStat
from ray.rllib.utils.timer import TimerStat
logger = logging.getLogger(__name__)
class SyncSamplesOptimizer(PolicyOptimizer):
"""A simple synchronous RL optimizer.
@@ -52,7 +55,7 @@ class SyncSamplesOptimizer(PolicyOptimizer):
if "stats" in fetches:
self.learner_stats = fetches["stats"]
if self.num_sgd_iter > 1:
print(i, fetches)
logger.debug("{} {}".format(i, fetches))
self.grad_timer.push_units_processed(samples.count)
self.num_steps_sampled += samples.count
+5 -1
View File
@@ -2,9 +2,12 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import os
import ray
logger = logging.getLogger(__name__)
class TaskPool(object):
"""Helper class for tracking the status of many in-flight actor tasks."""
@@ -80,11 +83,12 @@ def split_colocated(actors):
def try_create_colocated(cls, args, count):
actors = [cls.remote(*args) for _ in range(count)]
local, _ = split_colocated(actors)
print("Got {} colocated actors of {}".format(len(local), count))
logger.info("Got {} colocated actors of {}".format(len(local), count))
return local
def create_colocated(cls, args, count):
logger.info("Trying to create {} colocated actors".format(count))
ok = []
i = 1
while len(ok) < count and i < 10:
+6 -3
View File
@@ -2,18 +2,21 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import time
import base64
import numpy as np
import pyarrow
logger = logging.getLogger(__name__)
try:
import lz4.frame
LZ4_ENABLED = True
except ImportError:
print("WARNING: lz4 not available, disabling sample compression. "
"This will significantly impact RLlib performance. "
"To install lz4, run `pip install lz4`.")
logger.warn("lz4 not available, disabling sample compression. "
"This will significantly impact RLlib performance. "
"To install lz4, run `pip install lz4`.")
LZ4_ENABLED = False
+6 -4
View File
@@ -2,14 +2,17 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import pickle
logger = logging.getLogger(__name__)
try:
import requests # `requests` is not part of stdlib.
except ImportError:
requests = None
print("Couldn't import `requests` library. Be sure to install it on"
" the client side.")
logger.warn("Couldn't import `requests` library. Be sure to install it on"
" the client side.")
class PolicyClient(object):
@@ -109,8 +112,7 @@ class PolicyClient(object):
payload = pickle.dumps(data)
response = requests.post(self._address, data=payload)
if response.status_code != 200:
print("Request failed", data)
print(response.text)
logger.error("Request failed {}: {}".format(response.text, data))
response.raise_for_status()
parsed = pickle.loads(response.content)
return parsed
+6 -3
View File
@@ -2,12 +2,15 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import os
import time
import tensorflow as tf
from tensorflow.python.client import timeline
logger = logging.getLogger(__name__)
class TFRunBuilder(object):
"""Used to incrementally build up a TensorFlow run.
@@ -43,7 +46,7 @@ class TFRunBuilder(object):
self.session, self.fetches, self.debug_name,
self.feed_dict, os.environ.get("TF_TIMELINE_DIR"))
except Exception as e:
print("Error fetching: {}, feed_dict={}".format(
logger.error("Error fetching: {}, feed_dict={}".format(
self.fetches, self.feed_dict))
raise e
if isinstance(to_fetch, int):
@@ -76,8 +79,8 @@ def run_timeline(sess, ops, debug_name, feed_dict={}, timeline_dir=None):
debug_name, os.getpid(), _count))
_count += 1
trace_file = open(outf, "w")
print("Wrote tf timeline ({} s) to {}".format(time.time() - start,
os.path.abspath(outf)))
logger.info("Wrote tf timeline ({} s) to {}".format(
time.time() - start, os.path.abspath(outf)))
trace_file.write(trace.generate_chrome_trace_format())
else:
fetches = sess.run(ops, feed_dict=feed_dict)