diff --git a/.travis/install-dependencies.sh b/.travis/install-dependencies.sh index d3662c157..293c1b8b6 100755 --- a/.travis/install-dependencies.sh +++ b/.travis/install-dependencies.sh @@ -25,7 +25,7 @@ if [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "linux" ]]; then bash miniconda.sh -b -p $HOME/miniconda export PATH="$HOME/miniconda/bin:$PATH" pip install -q cython==0.27.3 cmake tensorflow gym opencv-python pyyaml pandas==0.22 requests \ - feather-format lxml openpyxl xlrd py-spy setproctitle faulthandler + feather-format lxml openpyxl xlrd py-spy setproctitle faulthandler pytest-timeout elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then sudo apt-get update sudo apt-get install -y cmake pkg-config python-dev python-numpy build-essential autoconf curl libtool unzip @@ -34,7 +34,7 @@ elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then bash miniconda.sh -b -p $HOME/miniconda export PATH="$HOME/miniconda/bin:$PATH" pip install -q cython==0.27.3 cmake tensorflow gym opencv-python pyyaml pandas==0.22 requests \ - feather-format lxml openpyxl xlrd py-spy setproctitle + feather-format lxml openpyxl xlrd py-spy setproctitle pytest-timeout elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then # check that brew is installed which -s brew @@ -51,7 +51,7 @@ elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then bash miniconda.sh -b -p $HOME/miniconda export PATH="$HOME/miniconda/bin:$PATH" pip install -q cython==0.27.3 cmake tensorflow gym opencv-python pyyaml pandas==0.22 requests \ - feather-format lxml openpyxl xlrd py-spy setproctitle faulthandler + feather-format lxml openpyxl xlrd py-spy setproctitle faulthandler pytest-timeout elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "macosx" ]]; then # check that brew is installed which -s brew @@ -68,7 +68,7 @@ elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "macosx" ]]; then bash miniconda.sh -b -p $HOME/miniconda export PATH="$HOME/miniconda/bin:$PATH" pip install -q cython==0.27.3 cmake tensorflow gym opencv-python pyyaml pandas==0.22 requests \ - feather-format lxml openpyxl xlrd py-spy setproctitle + feather-format lxml openpyxl xlrd py-spy setproctitle pytest-timeout elif [[ "$LINT" == "1" ]]; then sudo apt-get update sudo apt-get install -y cmake build-essential autoconf curl libtool unzip diff --git a/python/ray/test/cluster_utils.py b/python/ray/test/cluster_utils.py index 1c21ef067..afaf5dd21 100644 --- a/python/ray/test/cluster_utils.py +++ b/python/ray/test/cluster_utils.py @@ -118,12 +118,17 @@ class Cluster(object): Args: retries (int): Number of times to retry checking client table. + + Returns: + True if successfully registered nodes as expected. """ + for i in range(retries): if not ray.is_initialized() or not self._check_registered_nodes(): time.sleep(0.1) else: - break + return True + return False def _check_registered_nodes(self): registered = len([ diff --git a/python/ray/test/test_global_state.py b/python/ray/test/test_global_state.py index c5501dc9c..fc6aa0bc5 100644 --- a/python/ray/test/test_global_state.py +++ b/python/ray/test/test_global_state.py @@ -2,10 +2,16 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import json import pytest +try: + import pytest_timeout +except ModuleNotFoundError: + pytest_timeout = None import time import ray +from ray.test.cluster_utils import Cluster @pytest.fixture @@ -17,6 +23,28 @@ def ray_start(): ray.shutdown() +@pytest.fixture +def cluster_start(): + # Start the Ray processes. + cluster = Cluster( + initialize_head=True, + connect=True, + head_node_args={ + "resources": dict(CPU=1), + "_internal_config": json.dumps({ + "num_heartbeats_timeout": 10 + }) + }) + yield cluster + ray.shutdown() + cluster.shutdown() + + +# TODO(rliaw): The proper way to do this is to have the pytest config setup. +@pytest.mark.skipif( + pytest_timeout is None, + reason="Timeout package not installed; skipping test that may hang.") +@pytest.mark.timeout(10) def test_replenish_resources(ray_start): cluster_resources = ray.global_state.cluster_resources() available_resources = ray.global_state.available_resources() @@ -27,17 +55,18 @@ def test_replenish_resources(ray_start): pass ray.get(cpu_task.remote()) - start = time.time() resources_reset = False - timeout = 10 - while not resources_reset and time.time() - start < timeout: + while not resources_reset: available_resources = ray.global_state.available_resources() resources_reset = (cluster_resources == available_resources) - assert resources_reset +@pytest.mark.skipif( + pytest_timeout is None, + reason="Timeout package not installed; skipping test that may hang.") +@pytest.mark.timeout(10) def test_uses_resources(ray_start): cluster_resources = ray.global_state.cluster_resources() @@ -48,11 +77,32 @@ def test_uses_resources(ray_start): cpu_task.remote() resource_used = False - start = time.time() - timeout = 10 - while not resource_used and time.time() - start < timeout: + while not resource_used: available_resources = ray.global_state.available_resources() resource_used = available_resources[ "CPU"] == cluster_resources["CPU"] - 1 assert resource_used + + +@pytest.mark.skipif( + pytest_timeout is None, + reason="Timeout package not installed; skipping test that may hang.") +@pytest.mark.timeout(20) +def test_add_remove_cluster_resources(cluster_start): + """Tests that Global State API is consistent with actual cluster.""" + cluster = cluster_start + assert ray.global_state.cluster_resources()["CPU"] == 1 + nodes = [] + nodes += [cluster.add_node(resources=dict(CPU=1))] + assert cluster.wait_for_nodes() + assert ray.global_state.cluster_resources()["CPU"] == 2 + + cluster.remove_node(nodes.pop()) + assert cluster.wait_for_nodes() + assert ray.global_state.cluster_resources()["CPU"] == 1 + + for i in range(5): + nodes += [cluster.add_node(resources=dict(CPU=1))] + assert cluster.wait_for_nodes() + assert ray.global_state.cluster_resources()["CPU"] == 6