From 7d28bbbdbb058d186db4fa09f10c890cc8884f8b Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Sat, 24 Aug 2019 20:37:45 -0700
Subject: [PATCH] [rllib] Document on traj postprocess (#5532)

* document on traj postprocess

* shorten it
---
 .github/PULL_REQUEST_TEMPLATE.md |  6 +-----
 doc/source/rllib-training.rst    | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index d6d04c553..a29de5acd 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -2,11 +2,7 @@
 
 ## Why are these changes needed?
 
-<!-- Please give a short summary of the problem these changes address. -->
-
-## What do these changes do?
-
-<!-- Please give a short summary of these changes. -->
+<!-- Please give a short summary of the change and the problem this solves. -->
 
 ## Related issue number
 
diff --git a/doc/source/rllib-training.rst b/doc/source/rllib-training.rst
index 0d0a359fc..889d7bf59 100644
--- a/doc/source/rllib-training.rst
+++ b/doc/source/rllib-training.rst
@@ -259,6 +259,11 @@ You can provide callback functions to be called at points during policy evaluati
         print("trainer.train() result: {} -> {} episodes".format(
             info["trainer"].__name__, info["result"]["episodes_this_iter"]))
 
+    def on_postprocess_traj(info):
+        episode = info["episode"]
+        batch = info["post_batch"]  # note: you can mutate this
+        print("postprocessed {} steps".format(batch.count))
+
     ray.init()
     analysis = tune.run(
         "PG",
@@ -269,14 +274,25 @@ You can provide callback functions to be called at points during policy evaluati
                 "on_episode_step": tune.function(on_episode_step),
                 "on_episode_end": tune.function(on_episode_end),
                 "on_train_result": tune.function(on_train_result),
+                "on_postprocess_traj": tune.function(on_postprocess_traj),
             },
         },
     )
 
+Visualizing Custom Metrics
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 Custom metrics can be accessed and visualized like any other training result:
 
 .. image:: custom_metric.png
 
+Rewriting Trajectories
+~~~~~~~~~~~~~~~~~~~~~~
+
+Note that in the ``on_postprocess_batch`` callback you have full access to the trajectory batch (``post_batch``) and other training state. This can be used to rewrite the trajectory, which has a number of uses including:
+ * Backdating rewards to previous time steps (e.g., based on values in ``info``).
+ * Adding model-based curiosity bonuses to rewards (you can train the model with a `custom model supervised loss <rllib-models.html#supervised-model-losses>`__).
+
 Example: Curriculum Learning
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~