From 30bf8e46c78c405b363a31002544deb75cc224e4 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Mon, 4 Mar 2019 18:29:22 -0800
Subject: [PATCH] [rllib] Use nested scope in custom loss example

---
 ci/jenkins_tests/run_rllib_tests.sh      |  3 +++
 python/ray/rllib/examples/custom_loss.py | 17 ++++++++---------
 python/ray/rllib/offline/input_reader.py |  9 +++------
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/ci/jenkins_tests/run_rllib_tests.sh b/ci/jenkins_tests/run_rllib_tests.sh
index a110dac12..365902102 100644
--- a/ci/jenkins_tests/run_rllib_tests.sh
+++ b/ci/jenkins_tests/run_rllib_tests.sh
@@ -353,6 +353,9 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
     /ray/python/ray/rllib/tests/run_silent.sh examples/cartpole_lstm.py --stop=200 --use-prev-action-reward
 
+docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
+    /ray/python/ray/rllib/tests/run_silent.sh examples/custom_loss.py --iters=2
+
 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
     /ray/python/ray/rllib/tests/run_silent.sh examples/custom_metrics_and_callbacks.py --num-iters=2
 
diff --git a/python/ray/rllib/examples/custom_loss.py b/python/ray/rllib/examples/custom_loss.py
index 4c0a21034..85855992c 100644
--- a/python/ray/rllib/examples/custom_loss.py
+++ b/python/ray/rllib/examples/custom_loss.py
@@ -31,7 +31,7 @@ parser.add_argument(
     type=str,
     default=os.path.join(
         os.path.dirname(os.path.abspath(__file__)),
-        "../test/data/cartpole_small"))
+        "../tests/data/cartpole_small"))
 
 
 class CustomLossModel(Model):
@@ -39,8 +39,9 @@ class CustomLossModel(Model):
 
     def _build_layers_v2(self, input_dict, num_outputs, options):
         self.obs_in = input_dict["obs"]
-        self.fcnet = FullyConnectedNetwork(input_dict, self.obs_space,
-                                           num_outputs, options)
+        with tf.variable_scope("shared", reuse=tf.AUTO_REUSE):
+            self.fcnet = FullyConnectedNetwork(input_dict, self.obs_space,
+                                               num_outputs, options)
         return self.fcnet.outputs, self.fcnet.last_layer
 
     def custom_loss(self, policy_loss, loss_inputs):
@@ -49,12 +50,10 @@ class CustomLossModel(Model):
         input_ops = reader.tf_input_ops()
 
         # define a secondary loss by building a graph copy with weight sharing
-        with tf.variable_scope(
-                self.scope, reuse=tf.AUTO_REUSE, auxiliary_name_scope=False):
-            logits, _ = self._build_layers_v2({
-                "obs": restore_original_dimensions(input_ops["obs"],
-                                                   self.obs_space)
-            }, self.num_outputs, self.options)
+        logits, _ = self._build_layers_v2({
+            "obs": restore_original_dimensions(input_ops["obs"],
+                                               self.obs_space)
+        }, self.num_outputs, self.options)
 
         # You can also add self-supervised losses easily by referencing tensors
         # created during _build_layers_v2(). For example, an autoencoder-style
diff --git a/python/ray/rllib/offline/input_reader.py b/python/ray/rllib/offline/input_reader.py
index 0e325b776..bb4fe9116 100644
--- a/python/ray/rllib/offline/input_reader.py
+++ b/python/ray/rllib/offline/input_reader.py
@@ -45,12 +45,9 @@ class InputReader(object):
             ...     def custom_loss(self, policy_loss, loss_inputs):
             ...         reader = JsonReader(...)
             ...         input_ops = reader.tf_input_ops()
-            ...         with tf.variable_scope(
-            ...                 self.scope, reuse=tf.AUTO_REUSE,
-            ...                 auxiliary_name_scope=False):
-            ...             logits, _ = self._build_layers_v2(
-            ...                 {"obs": input_ops["obs"]},
-            ...                 self.num_outputs, self.options)
+            ...         logits, _ = self._build_layers_v2(
+            ...             {"obs": input_ops["obs"]},
+            ...             self.num_outputs, self.options)
             ...         il_loss = imitation_loss(logits, input_ops["action"])
             ...         return policy_loss + il_loss