From 80e8426b5ee33978e9986ce1911ec731e0f9559f Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Sun, 16 Jul 2017 11:51:33 -0700
Subject: [PATCH] Test example applications and rllib in jenkins tests. (#707)

* Test example applications in Jenkins.

* Fix default upload_dir argument for Algorithm class.

* Fix evolution strategies.

* Comment out policy gradient example which doesn't seem to work.

* Set --env-name for evolution strategies.
---
 docker/examples/Dockerfile                    |  2 +-
 examples/rl_pong/driver.py                    |  8 +++--
 python/ray/rllib/a3c/example.py               |  8 +++--
 python/ray/rllib/common.py                    |  3 +-
 .../ray/rllib/evolution_strategies/example.py | 12 ++++---
 python/ray/rllib/policy_gradient/example.py   |  6 +++-
 test/jenkins_tests/run_multi_node_tests.sh    | 31 ++++++++++++++++++-
 7 files changed, 58 insertions(+), 12 deletions(-)

diff --git a/docker/examples/Dockerfile b/docker/examples/Dockerfile
index 854649f4b..84a273267 100644
--- a/docker/examples/Dockerfile
+++ b/docker/examples/Dockerfile
@@ -3,4 +3,4 @@
 FROM ray-project/deploy
 RUN conda install -y -c conda-forge tensorflow
 RUN apt-get install -y zlib1g-dev
-RUN pip install gym[atari]
+RUN pip install gym[atari] opencv-python smart_open
diff --git a/examples/rl_pong/driver.py b/examples/rl_pong/driver.py
index 1e4f9db52..5bf784092 100644
--- a/examples/rl_pong/driver.py
+++ b/examples/rl_pong/driver.py
@@ -150,7 +150,9 @@ if __name__ == "__main__":
                         help="The number of rollouts to do per batch.")
     parser.add_argument("--redis-address", default=None, type=str,
                         help="The Redis address of the cluster.")
-
+    parser.add_argument("--iterations", default=-1, type=int,
+                        help="The number of model updates to perform. By "
+                             "default, training will not terminate.")
     args = parser.parse_args()
     batch_size = args.batch_size
 
@@ -169,7 +171,9 @@ if __name__ == "__main__":
     # Update the rmsprop memory.
     rmsprop_cache = {k: np.zeros_like(v) for k, v in model.items()}
     actors = [PongEnv.remote() for _ in range(batch_size)]
-    while True:
+    iteration = 0
+    while iteration != args.iterations:
+        iteration += 1
         model_id = ray.put(model)
         actions = []
         # Launch tasks to compute gradients from multiple rollouts in parallel.
diff --git a/python/ray/rllib/a3c/example.py b/python/ray/rllib/a3c/example.py
index 47fc3d777..de66693e1 100755
--- a/python/ray/rllib/a3c/example.py
+++ b/python/ray/rllib/a3c/example.py
@@ -17,7 +17,9 @@ if __name__ == "__main__":
     parser.add_argument("--redis-address", default=None, type=str,
                         help="The Redis address of the cluster.")
     parser.add_argument("--num-workers", default=4, type=int,
-                        help="The number of A3C workers to use>")
+                        help="The number of A3C workers to use.")
+    parser.add_argument("--iterations", default=-1, type=int,
+                        help="The number of training iterations to run.")
 
     args = parser.parse_args()
     ray.init(redis_address=args.redis_address, num_cpus=args.num_workers)
@@ -27,6 +29,8 @@ if __name__ == "__main__":
 
     a3c = A3C(args.environment, config)
 
-    while True:
+    iteration = 0
+    while iteration != args.iterations:
+        iteration += 1
         res = a3c.train()
         print("current status: {}".format(res))
diff --git a/python/ray/rllib/common.py b/python/ray/rllib/common.py
index 3b01a7de8..de0c57d0a 100644
--- a/python/ray/rllib/common.py
+++ b/python/ray/rllib/common.py
@@ -67,7 +67,7 @@ class Algorithm(object):
     TODO(ekl): support checkpoint / restore of training state.
     """
 
-    def __init__(self, env_name, config, upload_dir="file:///tmp/ray"):
+    def __init__(self, env_name, config, upload_dir=None):
         """Initialize an RLLib algorithm.
 
         Args:
@@ -77,6 +77,7 @@ class Algorithm(object):
             should be placed. Can be local like file:///tmp/ray/ or on S3
             like s3://bucketname/.
         """
+        upload_dir = "file:///tmp/ray" if upload_dir is None else upload_dir
         self.experiment_id = uuid.uuid4()
         self.env_name = env_name
         self.config = config
diff --git a/python/ray/rllib/evolution_strategies/example.py b/python/ray/rllib/evolution_strategies/example.py
index e2ee67295..99fc24966 100755
--- a/python/ray/rllib/evolution_strategies/example.py
+++ b/python/ray/rllib/evolution_strategies/example.py
@@ -21,6 +21,8 @@ if __name__ == "__main__":
                         help="The stepsize to use.")
     parser.add_argument("--redis-address", default=None, type=str,
                         help="The Redis address of the cluster.")
+    parser.add_argument("--iterations", default=-1, type=int,
+                        help="The number of training iterations to run.")
 
     args = parser.parse_args()
     num_workers = args.num_workers
@@ -30,11 +32,13 @@ if __name__ == "__main__":
     ray.init(redis_address=args.redis_address,
              num_workers=(0 if args.redis_address is None else None))
 
-    config = DEFAULT_CONFIG._replace(
-        num_workers=num_workers,
-        stepsize=stepsize)
+    config = DEFAULT_CONFIG.copy()
+    config["num_workers"] = num_workers
+    config["stepsize"] = stepsize
 
     alg = EvolutionStrategies(env_name, config)
-    while True:
+    iteration = 0
+    while iteration != args.iterations:
+        iteration += 1
         result = alg.train()
         print("current status: {}".format(result))
diff --git a/python/ray/rllib/policy_gradient/example.py b/python/ray/rllib/policy_gradient/example.py
index fda9a1fe6..2ddf70188 100755
--- a/python/ray/rllib/policy_gradient/example.py
+++ b/python/ray/rllib/policy_gradient/example.py
@@ -21,12 +21,16 @@ if __name__ == "__main__":
                         help="Run the script inside of tf-dbg.")
     parser.add_argument("--load-checkpoint", default=None, type=str,
                         help="Continue training from a checkpoint.")
+    parser.add_argument("--iterations", default=None, type=int,
+                        help="The number of training iterations to run.")
 
     args = parser.parse_args()
     config = DEFAULT_CONFIG.copy()
     config["use_tf_debugger"] = args.use_tf_debugger
-    if args.load_checkpoint:
+    if args.load_checkpoint is not None:
         config["load_checkpoint"] = args.load_checkpoint
+    if args.iterations is not None:
+        config["max_iterations"] = args.iterations
 
     ray.init(redis_address=args.redis_address)
 
diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh
index 757c419cc..df5102901 100755
--- a/test/jenkins_tests/run_multi_node_tests.sh
+++ b/test/jenkins_tests/run_multi_node_tests.sh
@@ -8,7 +8,7 @@ set -x
 
 ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
 
-DOCKER_SHA=$($ROOT_DIR/../../build-docker.sh --output-sha --no-cache --skip-examples)
+DOCKER_SHA=$($ROOT_DIR/../../build-docker.sh --output-sha --no-cache)
 echo "Using Docker image" $DOCKER_SHA
 
 python $ROOT_DIR/multi_node_docker_test.py \
@@ -40,3 +40,32 @@ python $ROOT_DIR/multi_node_docker_test.py \
     --mem-size=60G \
     --shm-size=60G \
     --test-script=/ray/test/jenkins_tests/multi_node_tests/large_memory_test.py
+
+# Test that the example applications run.
+
+# docker run --shm-size=10G --memory=10G $DOCKER_SHA \
+#     python /ray/examples/lbfgs/driver.py
+
+# docker run --shm-size=10G --memory=10G $DOCKER_SHA \
+#     python /ray/examples/rl_pong/driver.py \
+#     --iterations=3
+
+# docker run --shm-size=10G --memory=10G $DOCKER_SHA \
+#     python /ray/examples/hyperopt/hyperopt_simple.py
+
+# docker run --shm-size=10G --memory=10G $DOCKER_SHA \
+#     python /ray/examples/hyperopt/hyperopt_adaptive.py
+
+docker run --shm-size=10G --memory=10G $DOCKER_SHA \
+    python /ray/python/ray/rllib/a3c/example.py \
+    --environment=PongDeterministic-v0 \
+    --iterations=2
+
+# docker run --shm-size=10G --memory=10G $DOCKER_SHA \
+#     python /ray/python/ray/rllib/policy_gradient/example.py \
+#     --iterations=2
+
+docker run --shm-size=10G --memory=10G $DOCKER_SHA \
+    python /ray/python/ray/rllib/evolution_strategies/example.py \
+    --env-name=Pendulum-v0 \
+    --iterations=2