From 2dd0beb5bd7d0f77f4cdfb15fe40de6f1ac5c62e Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Wed, 29 May 2019 18:17:14 -0700 Subject: [PATCH] [rllib] Allow access to batches prior to postprocessing (#4871) --- doc/source/rllib-algorithms.rst | 4 ++++ doc/source/rllib-models.rst | 4 ++++ python/ray/rllib/agents/trainer.py | 10 ++++++++-- python/ray/rllib/evaluation/sample_batch_builder.py | 8 +++++++- .../ray/rllib/examples/custom_metrics_and_callbacks.py | 2 +- 5 files changed, 24 insertions(+), 4 deletions(-) diff --git a/doc/source/rllib-algorithms.rst b/doc/source/rllib-algorithms.rst index 5a07280e3..a9291bc4a 100644 --- a/doc/source/rllib-algorithms.rst +++ b/doc/source/rllib-algorithms.rst @@ -101,6 +101,10 @@ Tuned examples: `PongNoFrameskip-v4 `__): +.. warning:: + + Keras custom models are not compatible with multi-GPU (this includes PPO in single-GPU mode). This is because the multi-GPU implementation in RLlib relies on variable scopes to implement cross-GPU support. + .. literalinclude:: ../../python/ray/rllib/agents/ppo/appo.py :language: python :start-after: __sphinx_doc_begin__ diff --git a/doc/source/rllib-models.rst b/doc/source/rllib-models.rst index cdf42ea22..6a05e5b1c 100644 --- a/doc/source/rllib-models.rst +++ b/doc/source/rllib-models.rst @@ -35,6 +35,10 @@ Custom Models (TensorFlow) Custom TF models should subclass the common RLlib `model class `__ and override the ``_build_layers_v2`` method. This method takes in a dict of tensor inputs (the observation ``obs``, ``prev_action``, and ``prev_reward``, ``is_training``), and returns a feature layer and float vector of the specified output size. You can also override the ``value_function`` method to implement a custom value branch. Additional supervised / self-supervised losses can be added via the ``custom_loss`` method. The model can then be registered and used in place of a built-in model: +.. warning:: + + Keras custom models are not compatible with multi-GPU (this includes PPO in single-GPU mode). This is because the multi-GPU implementation in RLlib relies on variable scopes to implement cross-GPU support. + .. code-block:: python import ray diff --git a/python/ray/rllib/agents/trainer.py b/python/ray/rllib/agents/trainer.py index 83b00a896..4294affb1 100644 --- a/python/ray/rllib/agents/trainer.py +++ b/python/ray/rllib/agents/trainer.py @@ -54,14 +54,20 @@ COMMON_CONFIG = { # Callbacks that will be run during various phases of training. These all # take a single "info" dict as an argument. For episode callbacks, custom # metrics can be attached to the episode by updating the episode object's - # custom metrics dict (see examples/custom_metrics_and_callbacks.py). + # custom metrics dict (see examples/custom_metrics_and_callbacks.py). You + # may also mutate the passed in batch data in your callback. "callbacks": { "on_episode_start": None, # arg: {"env": .., "episode": ...} "on_episode_step": None, # arg: {"env": .., "episode": ...} "on_episode_end": None, # arg: {"env": .., "episode": ...} "on_sample_end": None, # arg: {"samples": .., "evaluator": ...} "on_train_result": None, # arg: {"trainer": ..., "result": ...} - "on_postprocess_traj": None, # arg: {"batch": ..., "episode": ...} + "on_postprocess_traj": None, # arg: { + # "agent_id": ..., "episode": ..., + # "pre_batch": (before processing), + # "post_batch": (after processing), + # "all_pre_batches": (other agent ids), + # } }, # Whether to attempt to continue training if a worker crashes. "ignore_worker_failures": False, diff --git a/python/ray/rllib/evaluation/sample_batch_builder.py b/python/ray/rllib/evaluation/sample_batch_builder.py index 0ead77d52..e82ca7357 100644 --- a/python/ray/rllib/evaluation/sample_batch_builder.py +++ b/python/ray/rllib/evaluation/sample_batch_builder.py @@ -165,7 +165,13 @@ class MultiAgentSampleBatchBuilder(object): self.policy_builders[self.agent_to_policy[agent_id]].add_batch( post_batch) if self.postp_callback: - self.postp_callback({"episode": episode, "batch": post_batch}) + self.postp_callback({ + "episode": episode, + "agent_id": agent_id, + "pre_batch": pre_batches[agent_id], + "post_batch": post_batch, + "all_pre_batches": pre_batches, + }) self.agent_builders.clear() self.agent_to_policy.clear() diff --git a/python/ray/rllib/examples/custom_metrics_and_callbacks.py b/python/ray/rllib/examples/custom_metrics_and_callbacks.py index 27d91331f..ba7795bf0 100644 --- a/python/ray/rllib/examples/custom_metrics_and_callbacks.py +++ b/python/ray/rllib/examples/custom_metrics_and_callbacks.py @@ -46,7 +46,7 @@ def on_train_result(info): def on_postprocess_traj(info): episode = info["episode"] - batch = info["batch"] + batch = info["post_batch"] print("postprocessed {} steps".format(batch.count)) if "num_batches" not in episode.custom_metrics: episode.custom_metrics["num_batches"] = 0