From 80e8426b5ee33978e9986ce1911ec731e0f9559f Mon Sep 17 00:00:00 2001 From: Robert Nishihara Date: Sun, 16 Jul 2017 11:51:33 -0700 Subject: [PATCH] Test example applications and rllib in jenkins tests. (#707) * Test example applications in Jenkins. * Fix default upload_dir argument for Algorithm class. * Fix evolution strategies. * Comment out policy gradient example which doesn't seem to work. * Set --env-name for evolution strategies. --- docker/examples/Dockerfile | 2 +- examples/rl_pong/driver.py | 8 +++-- python/ray/rllib/a3c/example.py | 8 +++-- python/ray/rllib/common.py | 3 +- .../ray/rllib/evolution_strategies/example.py | 12 ++++--- python/ray/rllib/policy_gradient/example.py | 6 +++- test/jenkins_tests/run_multi_node_tests.sh | 31 ++++++++++++++++++- 7 files changed, 58 insertions(+), 12 deletions(-) diff --git a/docker/examples/Dockerfile b/docker/examples/Dockerfile index 854649f4b..84a273267 100644 --- a/docker/examples/Dockerfile +++ b/docker/examples/Dockerfile @@ -3,4 +3,4 @@ FROM ray-project/deploy RUN conda install -y -c conda-forge tensorflow RUN apt-get install -y zlib1g-dev -RUN pip install gym[atari] +RUN pip install gym[atari] opencv-python smart_open diff --git a/examples/rl_pong/driver.py b/examples/rl_pong/driver.py index 1e4f9db52..5bf784092 100644 --- a/examples/rl_pong/driver.py +++ b/examples/rl_pong/driver.py @@ -150,7 +150,9 @@ if __name__ == "__main__": help="The number of rollouts to do per batch.") parser.add_argument("--redis-address", default=None, type=str, help="The Redis address of the cluster.") - + parser.add_argument("--iterations", default=-1, type=int, + help="The number of model updates to perform. By " + "default, training will not terminate.") args = parser.parse_args() batch_size = args.batch_size @@ -169,7 +171,9 @@ if __name__ == "__main__": # Update the rmsprop memory. rmsprop_cache = {k: np.zeros_like(v) for k, v in model.items()} actors = [PongEnv.remote() for _ in range(batch_size)] - while True: + iteration = 0 + while iteration != args.iterations: + iteration += 1 model_id = ray.put(model) actions = [] # Launch tasks to compute gradients from multiple rollouts in parallel. diff --git a/python/ray/rllib/a3c/example.py b/python/ray/rllib/a3c/example.py index 47fc3d777..de66693e1 100755 --- a/python/ray/rllib/a3c/example.py +++ b/python/ray/rllib/a3c/example.py @@ -17,7 +17,9 @@ if __name__ == "__main__": parser.add_argument("--redis-address", default=None, type=str, help="The Redis address of the cluster.") parser.add_argument("--num-workers", default=4, type=int, - help="The number of A3C workers to use>") + help="The number of A3C workers to use.") + parser.add_argument("--iterations", default=-1, type=int, + help="The number of training iterations to run.") args = parser.parse_args() ray.init(redis_address=args.redis_address, num_cpus=args.num_workers) @@ -27,6 +29,8 @@ if __name__ == "__main__": a3c = A3C(args.environment, config) - while True: + iteration = 0 + while iteration != args.iterations: + iteration += 1 res = a3c.train() print("current status: {}".format(res)) diff --git a/python/ray/rllib/common.py b/python/ray/rllib/common.py index 3b01a7de8..de0c57d0a 100644 --- a/python/ray/rllib/common.py +++ b/python/ray/rllib/common.py @@ -67,7 +67,7 @@ class Algorithm(object): TODO(ekl): support checkpoint / restore of training state. """ - def __init__(self, env_name, config, upload_dir="file:///tmp/ray"): + def __init__(self, env_name, config, upload_dir=None): """Initialize an RLLib algorithm. Args: @@ -77,6 +77,7 @@ class Algorithm(object): should be placed. Can be local like file:///tmp/ray/ or on S3 like s3://bucketname/. """ + upload_dir = "file:///tmp/ray" if upload_dir is None else upload_dir self.experiment_id = uuid.uuid4() self.env_name = env_name self.config = config diff --git a/python/ray/rllib/evolution_strategies/example.py b/python/ray/rllib/evolution_strategies/example.py index e2ee67295..99fc24966 100755 --- a/python/ray/rllib/evolution_strategies/example.py +++ b/python/ray/rllib/evolution_strategies/example.py @@ -21,6 +21,8 @@ if __name__ == "__main__": help="The stepsize to use.") parser.add_argument("--redis-address", default=None, type=str, help="The Redis address of the cluster.") + parser.add_argument("--iterations", default=-1, type=int, + help="The number of training iterations to run.") args = parser.parse_args() num_workers = args.num_workers @@ -30,11 +32,13 @@ if __name__ == "__main__": ray.init(redis_address=args.redis_address, num_workers=(0 if args.redis_address is None else None)) - config = DEFAULT_CONFIG._replace( - num_workers=num_workers, - stepsize=stepsize) + config = DEFAULT_CONFIG.copy() + config["num_workers"] = num_workers + config["stepsize"] = stepsize alg = EvolutionStrategies(env_name, config) - while True: + iteration = 0 + while iteration != args.iterations: + iteration += 1 result = alg.train() print("current status: {}".format(result)) diff --git a/python/ray/rllib/policy_gradient/example.py b/python/ray/rllib/policy_gradient/example.py index fda9a1fe6..2ddf70188 100755 --- a/python/ray/rllib/policy_gradient/example.py +++ b/python/ray/rllib/policy_gradient/example.py @@ -21,12 +21,16 @@ if __name__ == "__main__": help="Run the script inside of tf-dbg.") parser.add_argument("--load-checkpoint", default=None, type=str, help="Continue training from a checkpoint.") + parser.add_argument("--iterations", default=None, type=int, + help="The number of training iterations to run.") args = parser.parse_args() config = DEFAULT_CONFIG.copy() config["use_tf_debugger"] = args.use_tf_debugger - if args.load_checkpoint: + if args.load_checkpoint is not None: config["load_checkpoint"] = args.load_checkpoint + if args.iterations is not None: + config["max_iterations"] = args.iterations ray.init(redis_address=args.redis_address) diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh index 757c419cc..df5102901 100755 --- a/test/jenkins_tests/run_multi_node_tests.sh +++ b/test/jenkins_tests/run_multi_node_tests.sh @@ -8,7 +8,7 @@ set -x ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd) -DOCKER_SHA=$($ROOT_DIR/../../build-docker.sh --output-sha --no-cache --skip-examples) +DOCKER_SHA=$($ROOT_DIR/../../build-docker.sh --output-sha --no-cache) echo "Using Docker image" $DOCKER_SHA python $ROOT_DIR/multi_node_docker_test.py \ @@ -40,3 +40,32 @@ python $ROOT_DIR/multi_node_docker_test.py \ --mem-size=60G \ --shm-size=60G \ --test-script=/ray/test/jenkins_tests/multi_node_tests/large_memory_test.py + +# Test that the example applications run. + +# docker run --shm-size=10G --memory=10G $DOCKER_SHA \ +# python /ray/examples/lbfgs/driver.py + +# docker run --shm-size=10G --memory=10G $DOCKER_SHA \ +# python /ray/examples/rl_pong/driver.py \ +# --iterations=3 + +# docker run --shm-size=10G --memory=10G $DOCKER_SHA \ +# python /ray/examples/hyperopt/hyperopt_simple.py + +# docker run --shm-size=10G --memory=10G $DOCKER_SHA \ +# python /ray/examples/hyperopt/hyperopt_adaptive.py + +docker run --shm-size=10G --memory=10G $DOCKER_SHA \ + python /ray/python/ray/rllib/a3c/example.py \ + --environment=PongDeterministic-v0 \ + --iterations=2 + +# docker run --shm-size=10G --memory=10G $DOCKER_SHA \ +# python /ray/python/ray/rllib/policy_gradient/example.py \ +# --iterations=2 + +docker run --shm-size=10G --memory=10G $DOCKER_SHA \ + python /ray/python/ray/rllib/evolution_strategies/example.py \ + --env-name=Pendulum-v0 \ + --iterations=2