From 62cbc043b4ce2365a7859e3932d22a17f75e1b6c Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Fri, 15 Nov 2019 08:45:44 -0800 Subject: [PATCH] [tune] tbx logger (#6133) * tbx * add_hparams * fix_hparams * ok * ok * fix * ok * fix --- doc/source/tune-usage.rst | 18 ++++++++- docker/examples/Dockerfile | 2 +- docker/tune_test/Dockerfile | 2 +- python/ray/tune/logger.py | 57 ++++++++++++++++++++++++++++ python/ray/tune/tests/test_logger.py | 10 ++++- 5 files changed, 85 insertions(+), 4 deletions(-) diff --git a/doc/source/tune-usage.rst b/doc/source/tune-usage.rst index c5d2f42ec..ffc96852e 100644 --- a/doc/source/tune-usage.rst +++ b/doc/source/tune-usage.rst @@ -598,13 +598,29 @@ You can pass in your own logging mechanisms to output logs in custom formats as from ray.tune.logger import DEFAULT_LOGGERS tune.run( - MyTrainableClass + MyTrainableClass, name="experiment_name", loggers=DEFAULT_LOGGERS + (CustomLogger1, CustomLogger2) ) These loggers will be called along with the default Tune loggers. All loggers must inherit the `Logger interface `__. Tune enables default loggers for Tensorboard, CSV, and JSON formats. You can also check out `logger.py `__ for implementation details. An example can be found in `logging_example.py `__. +.. warning:: If you run into issues for TensorBoard logging, consider using the TensorBoardX Logger (``from ray.tune.logger import TBXLogger``) + +TBXLogger (TensorboardX) +~~~~~~~~~~~~~~~~~~~~~~~~ + +Tune provides a logger using `TensorBoardX `_. You can install tensorboardX via ``pip install tensorboardX``. This logger automatically outputs loggers similar to the default TensorFlow logging format but is nice if you are undergoing a TF1 to TF2 transition. By default, it will log any scalar value provided via the result dictionary along with HParams information. + +.. code-block:: python + + from ray.tune.logger import TBXLogger + + tune.run( + MyTrainableClass, + name="experiment_name", + loggers=[TBXLogger] + ) MLFlow ~~~~~~ diff --git a/docker/examples/Dockerfile b/docker/examples/Dockerfile index eb5ec300d..0bced2ccc 100644 --- a/docker/examples/Dockerfile +++ b/docker/examples/Dockerfile @@ -14,6 +14,6 @@ RUN pip install -U h5py # Mutes FutureWarnings RUN pip install --upgrade bayesian-optimization RUN pip install --upgrade hyperopt==0.1.2 RUN pip install ConfigSpace==0.4.10 -RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster lightgbm xgboost torch torchvision +RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster lightgbm xgboost torch torchvision tensorboardX RUN pip install -U tabulate mlflow RUN pip install -U pytest-remotedata>=0.3.1 diff --git a/docker/tune_test/Dockerfile b/docker/tune_test/Dockerfile index a3e702d77..3cf8ae527 100644 --- a/docker/tune_test/Dockerfile +++ b/docker/tune_test/Dockerfile @@ -17,7 +17,7 @@ RUN pip install gym[atari]==0.10.11 opencv-python-headless tensorflow lz4 keras RUN pip install --upgrade bayesian-optimization RUN pip install --upgrade hyperopt==0.1.2 RUN pip install ConfigSpace==0.4.10 -RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster lightgbm xgboost +RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster lightgbm xgboost tensorboardX RUN pip install -U mlflow RUN pip install -U pytest-remotedata>=0.3.1 diff --git a/python/ray/tune/logger.py b/python/ray/tune/logger.py index c672bdbcc..aa70cf76e 100644 --- a/python/ray/tune/logger.py +++ b/python/ray/tune/logger.py @@ -316,6 +316,63 @@ class CSVLogger(Logger): self._file.close() +class TBXLogger(Logger): + """TensorBoardX Logger. + + Automatically flattens nested dicts to show on TensorBoard: + + {"a": {"b": 1, "c": 2}} -> {"a/b": 1, "a/c": 2} + """ + + def _init(self): + try: + from tensorboardX import SummaryWriter + except ImportError: + logger.error("pip install tensorboardX to see TensorBoard files.") + raise + self._file_writer = SummaryWriter(self.logdir, flush_secs=30) + self.last_result = None + + def on_result(self, result): + step = result.get(TIMESTEPS_TOTAL) or result[TRAINING_ITERATION] + + tmp = result.copy() + for k in [ + "config", "pid", "timestamp", TIME_TOTAL_S, TRAINING_ITERATION + ]: + if k in tmp: + del tmp[k] # not useful to log these + + flat_result = flatten_dict(tmp, delimiter="/") + path = ["ray", "tune"] + valid_result = { + "/".join(path + [attr]): value + for attr, value in flat_result.items() + if type(value) in VALID_SUMMARY_TYPES + } + + for attr, value in valid_result.items(): + self._file_writer.add_scalar(attr, value, global_step=step) + self.last_result = valid_result + self._file_writer.flush() + + def flush(self): + if self._file_writer is not None: + self._file_writer.flush() + + def close(self): + if self._file_writer is not None: + if self.trial and self.trial.evaluated_params and self.last_result: + from tensorboardX.summary import hparams + experiment_tag, session_start_tag, session_end_tag = hparams( + hparam_dict=self.trial.evaluated_params, + metric_dict=self.last_result) + self._file_writer.file_writer.add_summary(experiment_tag) + self._file_writer.file_writer.add_summary(session_start_tag) + self._file_writer.file_writer.add_summary(session_end_tag) + self._file_writer.close() + + DEFAULT_LOGGERS = (JsonLogger, CSVLogger, tf2_compat_logger) diff --git a/python/ray/tune/tests/test_logger.py b/python/ray/tune/tests/test_logger.py index 59bc4d288..0ead18b96 100644 --- a/python/ray/tune/tests/test_logger.py +++ b/python/ray/tune/tests/test_logger.py @@ -7,7 +7,7 @@ import unittest import tempfile import shutil -from ray.tune.logger import tf2_compat_logger, JsonLogger, CSVLogger +from ray.tune.logger import tf2_compat_logger, JsonLogger, CSVLogger, TBXLogger Trial = namedtuple("MockTrial", ["evaluated_params", "trial_id"]) @@ -54,6 +54,14 @@ class LoggerSuite(unittest.TestCase): logger.on_result(result(2, 4)) logger.close() + def testTBX(self): + config = {"a": 2, "b": 5} + t = Trial(evaluated_params=config, trial_id="tbx") + logger = TBXLogger(config=config, logdir=self.test_dir, trial=t) + logger.on_result(result(2, 4)) + logger.on_result(result(2, 4)) + logger.close() + if __name__ == "__main__": unittest.main(verbosity=2)