From e94bebb1de7d66e3bc367106b63c3f4b30ada3f7 Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Fri, 1 Nov 2019 16:42:04 -0700 Subject: [PATCH] [tune] Fix Jenkins tests (#6028) --- ci/jenkins_tests/run_tune_tests.sh | 2 +- docker/examples/Dockerfile | 3 ++- docker/tune_test/Dockerfile | 3 ++- python/ray/tune/tests/test_cluster.py | 6 +++++- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/ci/jenkins_tests/run_tune_tests.sh b/ci/jenkins_tests/run_tune_tests.sh index 3250a4884..0d024a0cd 100755 --- a/ci/jenkins_tests/run_tune_tests.sh +++ b/ci/jenkins_tests/run_tune_tests.sh @@ -55,7 +55,7 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ bash -c 'pip install -U tensorflow==1.12 && python /ray/python/ray/tune/tests/test_logger.py' -$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ +$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} -e MPLBACKEND=Agg $DOCKER_SHA \ python /ray/python/ray/tune/tests/tutorial.py $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ diff --git a/docker/examples/Dockerfile b/docker/examples/Dockerfile index a07e10b6e..fc8da7660 100644 --- a/docker/examples/Dockerfile +++ b/docker/examples/Dockerfile @@ -4,7 +4,8 @@ FROM ray-project/deploy # This updates numpy to 1.14 and mutes errors from other libraries RUN conda install -y numpy -RUN apt-get install -y zlib1g-dev +# Needed to run Tune example with a 'plot' call - which does not actually render a plot, but throws an error. +RUN apt-get install -y zlib1g-dev libgl1-mesa-dev # The following is needed to support TensorFlow 1.14 RUN conda remove -y --force wrapt RUN pip install -U pip diff --git a/docker/tune_test/Dockerfile b/docker/tune_test/Dockerfile index a6cb26f0f..a3e702d77 100644 --- a/docker/tune_test/Dockerfile +++ b/docker/tune_test/Dockerfile @@ -9,7 +9,8 @@ RUN pip install -U pip RUN pip install -U https://ray-wheels.s3-us-west-2.amazonaws.com/latest/ray-0.8.0.dev6-cp36-cp36m-manylinux1_x86_64.whl || pip install -U https://ray-wheels.s3-us-west-2.amazonaws.com/latest/ray-0.8.0.dev5-cp36-cp36m-manylinux1_x86_64.whl RUN pip install -U boto3 # We install this after the latest wheels -- this should not override the latest wheels. -RUN apt-get install -y zlib1g-dev +# Needed to run Tune example with a 'plot' call - which does not actually render a plot, but throws an error. +RUN apt-get install -y zlib1g-dev libgl1-mesa-dev # The following is needed to support TensorFlow 1.14 RUN conda remove -y --force wrapt RUN pip install gym[atari]==0.10.11 opencv-python-headless tensorflow lz4 keras pytest-timeout smart_open torch torchvision diff --git a/python/ray/tune/tests/test_cluster.py b/python/ray/tune/tests/test_cluster.py index a6ff605d9..3dc2fd2b0 100644 --- a/python/ray/tune/tests/test_cluster.py +++ b/python/ray/tune/tests/test_cluster.py @@ -370,8 +370,8 @@ def test_cluster_down_simple(start_connected_cluster, tmpdir): assert all(t.status == Trial.RUNNING for t in runner.get_trials()) runner.checkpoint() - cluster.shutdown() ray.shutdown() + cluster.shutdown() cluster = _start_new_cluster() runner = TrialRunner(resume="LOCAL", local_checkpoint_dir=dirpath) @@ -385,6 +385,7 @@ def test_cluster_down_simple(start_connected_cluster, tmpdir): runner.step() assert all(t.status == Trial.TERMINATED for t in runner.get_trials()) + ray.shutdown() cluster.shutdown() @@ -425,6 +426,7 @@ def test_cluster_down_full(start_connected_cluster, tmpdir): all_experiments, resume=True, raise_on_failed_trial=False) assert len(trials) == 4 assert all(t.status in [Trial.TERMINATED, Trial.ERROR] for t in trials) + ray.shutdown() cluster.shutdown() @@ -487,6 +489,7 @@ tune.run( }, resume=True) assert all(t.status == Trial.TERMINATED for t in trials2) + ray.shutdown() cluster.shutdown() @@ -588,4 +591,5 @@ tune.run( raise_on_failed_trial=False) assert all(t.status == Trial.TERMINATED for t in trials2) assert {t.trial_id for t in trials2} == {t.trial_id for t in trials} + ray.shutdown() cluster.shutdown()