From 43df405d07516ba4be8d1a7686c090fde2184b0b Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Wed, 7 Nov 2018 14:54:28 -0800 Subject: [PATCH] [rllib] Add some debug logs during agent setup (#3247) --- python/ray/rllib/agents/agent.py | 5 +++++ python/ray/rllib/evaluation/policy_evaluator.py | 5 +++++ python/ray/rllib/evaluation/tf_policy_graph.py | 6 ++++++ python/ray/rllib/models/catalog.py | 13 ++++++++++--- python/ray/rllib/models/preprocessors.py | 4 ++-- python/ray/rllib/optimizers/policy_optimizer.py | 3 +++ 6 files changed, 31 insertions(+), 5 deletions(-) diff --git a/python/ray/rllib/agents/agent.py b/python/ray/rllib/agents/agent.py index 3ceb358c5..54e44843d 100644 --- a/python/ray/rllib/agents/agent.py +++ b/python/ray/rllib/agents/agent.py @@ -20,6 +20,8 @@ from ray.tune.trainable import Trainable from ray.tune.logger import UnifiedLogger from ray.tune.result import DEFAULT_RESULTS_DIR +logger = logging.getLogger(__name__) + # yapf: disable # __sphinx_doc_begin__ COMMON_CONFIG = { @@ -252,6 +254,7 @@ class Agent(Trainable): self.optimizer.local_evaluator.set_global_vars(self.global_vars) for ev in self.optimizer.remote_evaluators: ev.set_global_vars.remote(self.global_vars) + logger.debug("updated global vars: {}".format(self.global_vars)) if (self.config.get("observation_filter", "NoFilter") != "NoFilter" and hasattr(self, "local_evaluator")): @@ -259,6 +262,8 @@ class Agent(Trainable): self.local_evaluator.filters, self.remote_evaluators, update_remote=self.config["synchronize_filters"]) + logger.debug("synchronized filters: {}".format( + self.local_evaluator.filters)) return Trainable.train(self) diff --git a/python/ray/rllib/evaluation/policy_evaluator.py b/python/ray/rllib/evaluation/policy_evaluator.py index 4120e6b4d..3344ce464 100644 --- a/python/ray/rllib/evaluation/policy_evaluator.py +++ b/python/ray/rllib/evaluation/policy_evaluator.py @@ -24,6 +24,8 @@ from ray.rllib.utils.compression import pack from ray.rllib.utils.filter import get_filter from ray.rllib.utils.tf_run_builder import TFRunBuilder +logger = logging.getLogger(__name__) + class PolicyEvaluator(EvaluatorInterface): """Common ``PolicyEvaluator`` implementation that wraps a ``PolicyGraph``. @@ -301,6 +303,9 @@ class PolicyEvaluator(EvaluatorInterface): pack=pack_episodes, tf_sess=self.tf_sess) + logger.debug("Created evaluator with env {} ({}), policies {}".format( + self.async_env, self.env, self.policy_map)) + def _build_policy_map(self, policy_dict, policy_config): policy_map = {} for name, (cls, obs_space, act_space, diff --git a/python/ray/rllib/evaluation/tf_policy_graph.py b/python/ray/rllib/evaluation/tf_policy_graph.py index bea53384b..c3fcf73d6 100644 --- a/python/ray/rllib/evaluation/tf_policy_graph.py +++ b/python/ray/rllib/evaluation/tf_policy_graph.py @@ -2,6 +2,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import logging import tensorflow as tf import numpy as np @@ -11,6 +12,8 @@ from ray.rllib.models.lstm import chop_into_sequences from ray.rllib.utils.tf_run_builder import TFRunBuilder from ray.rllib.utils.schedules import ConstantSchedule, PiecewiseSchedule +logger = logging.getLogger(__name__) + class TFPolicyGraph(PolicyGraph): """An agent policy and loss implemented in TensorFlow. @@ -116,6 +119,9 @@ class TFPolicyGraph(PolicyGraph): raise ValueError( "seq_lens tensor must be given if state inputs are defined") + logger.debug("Created {} with loss inputs: {}".format( + self, self._loss_input_dict)) + def build_compute_actions(self, builder, obs_batch, diff --git a/python/ray/rllib/models/catalog.py b/python/ray/rllib/models/catalog.py index 2bb6d2823..4ffecf8b4 100644 --- a/python/ray/rllib/models/catalog.py +++ b/python/ray/rllib/models/catalog.py @@ -203,6 +203,9 @@ class ModelCatalog(object): model = LSTM(copy, obs_space, num_outputs, options, state_in, seq_lens) + logger.debug("Created model {}: ({} of {}, {}, {}) -> {}, {}".format( + model, input_dict, obs_space, state_in, seq_lens, model.outputs, + model.state_out)) return model @staticmethod @@ -282,11 +285,15 @@ class ModelCatalog(object): if options.get("custom_preprocessor"): preprocessor = options["custom_preprocessor"] logger.info("Using custom preprocessor {}".format(preprocessor)) - return _global_registry.get(RLLIB_PREPROCESSOR, preprocessor)( + prep = _global_registry.get(RLLIB_PREPROCESSOR, preprocessor)( env.observation_space, options) + else: + cls = get_preprocessor(env.observation_space) + prep = cls(env.observation_space, options) - preprocessor = get_preprocessor(env.observation_space) - return preprocessor(env.observation_space, options) + logger.debug("Created preprocessor {}: {} -> {}".format( + prep, env.observation_space, prep.shape)) + return prep @staticmethod def get_preprocessor_as_wrapper(env, options=None): diff --git a/python/ray/rllib/models/preprocessors.py b/python/ray/rllib/models/preprocessors.py index 8144b5706..074fda29b 100644 --- a/python/ray/rllib/models/preprocessors.py +++ b/python/ray/rllib/models/preprocessors.py @@ -132,7 +132,7 @@ class TupleFlatteningPreprocessor(Preprocessor): self.preprocessors = [] for i in range(len(self._obs_space.spaces)): space = self._obs_space.spaces[i] - logger.info("Creating sub-preprocessor for {}".format(space)) + logger.debug("Creating sub-preprocessor for {}".format(space)) preprocessor = get_preprocessor(space)(space, self._options) self.preprocessors.append(preprocessor) size += preprocessor.size @@ -157,7 +157,7 @@ class DictFlatteningPreprocessor(Preprocessor): size = 0 self.preprocessors = [] for space in self._obs_space.spaces.values(): - logger.info("Creating sub-preprocessor for {}".format(space)) + logger.debug("Creating sub-preprocessor for {}".format(space)) preprocessor = get_preprocessor(space)(space, self._options) self.preprocessors.append(preprocessor) size += preprocessor.size diff --git a/python/ray/rllib/optimizers/policy_optimizer.py b/python/ray/rllib/optimizers/policy_optimizer.py index 4979c15dd..3a0d3cf80 100644 --- a/python/ray/rllib/optimizers/policy_optimizer.py +++ b/python/ray/rllib/optimizers/policy_optimizer.py @@ -57,6 +57,9 @@ class PolicyOptimizer(object): self.num_steps_trained = 0 self.num_steps_sampled = 0 + logger.debug("Created policy optimizer with {}: {}".format( + config, self)) + def _init(self): """Subclasses should prefer overriding this instead of __init__."""