From 6f1a29ad3f9b0fd6562a68519f0fcde9d4560e2d Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Sat, 2 Mar 2019 06:38:28 +0800 Subject: [PATCH] Consodiate CI Python tests and fix bug about multiple ray.init (#4195) --- .travis.yml | 55 ++----------------------- python/ray/remote_function.py | 11 ++++- python/ray/tests/test_object_manager.py | 5 +++ python/ray/worker.py | 4 ++ 4 files changed, 23 insertions(+), 52 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5f2f4e508..c5948741c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -158,12 +158,8 @@ script: # ray tune tests - python python/ray/tune/test/dependency_test.py - - python -m pytest -v --durations=10 python/ray/tune/test/trial_runner_test.py - - python -m pytest -v --durations=10 python/ray/tune/test/trial_scheduler_test.py - - python -m pytest -v --durations=10 python/ray/tune/test/experiment_test.py - - python -m pytest -v --durations=10 python/ray/tune/test/tune_server_test.py - - python -m pytest -v --durations=10 python/ray/tune/test/ray_trial_executor_test.py - - python -m pytest -v --durations=10 python/ray/tune/test/automl_searcher_test.py + # `cluster_tests.py` runs on Jenkins, not Travis. + - python -m pytest -v --durations=30 --ignore=python/ray/tune/cluster_tests.py python/ray/tune/test # ray rllib tests - python python/ray/rllib/test/test_catalog.py @@ -171,53 +167,10 @@ script: - python python/ray/rllib/test/test_optimizers.py - python python/ray/rllib/test/test_evaluators.py - # streaming library tests - - python -m pytest -v --durations=10 python/ray/tests/test_batched_queue.py - - python -m pytest -v --durations=10 python/ray/tests/test_logical_graph.py - + # ray tests # Python3.5+ only. Otherwise we will get `SyntaxError` regardless of how we set the tester. - python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=10 python/ray/experimental/test/async_test.py - - # ray tests - # TODO(williamma12): We cannot use pytests built-in test discovery because - # it causes a lot of the tests to fail on travis' apple builds even though - # it runs without issue on an apple build locally. - - python -m pytest -v --durations=10 python/ray/tests/test_global_state.py - - python -m pytest -v --durations=10 python/ray/tests/test_queue.py - - python -m pytest -v --durations=10 python/ray/tests/test_ray_init.py - - python -m pytest -v --durations=10 python/ray/tests/test_mini.py - - - python -m pytest -v --durations=10 python/ray/tests/test_basic.py - - python -m pytest -v --durations=10 python/ray/tests/test_array.py - - python -m pytest -v --durations=10 python/ray/tests/test_actor.py - - python -m pytest -v --durations=10 python/ray/tests/test_autoscaler.py - - python -m pytest -v --durations=10 python/ray/tests/test_tensorflow.py - - python -m pytest -v --durations=10 python/ray/tests/test_failure.py - - python -m pytest -v --durations=10 python/ray/tests/test_microbenchmarks.py - - python -m pytest -v --durations=10 python/ray/tests/test_stress.py - - python -m pytest -v --durations=10 python/ray/tests/test_component_failures.py - - python -m pytest -v --durations=10 python/ray/tests/test_multi_node.py - - python -m pytest -v --durations=10 python/ray/tests/test_multi_node_2.py - - python -m pytest -v --durations=10 python/ray/tests/test_recursion.py - - python -m pytest -v --durations=10 python/ray/tests/test_monitors.py - - python -m pytest -v --durations=10 python/ray/tests/test_cython.py - - python -m pytest -v --durations=10 python/ray/tests/test_credis.py - - python -m pytest -v --durations=10 python/ray/tests/test_node_manager.py - - python -m pytest -v --durations=10 python/ray/tests/test_signal.py - # TODO(yuhguo): object_manager_test.py requires a lot of CPU/memory, and - # better be put in Jenkins. However, it fails frequently in Jenkins, but - # works well in Travis. We should consider moving it back to Jenkins once - # we figure out the reason. - - python -m pytest -v --durations=10 python/ray/tests/test_object_manager.py - - # ray temp file tests - - python -m pytest -v --durations=10 python/ray/tests/test_tempfile.py - - # ray debug tools tests - - python -m pytest -v --durations=10 python/ray/tests/test_debug_tools.py - - # modin test files - - python -m pytest -v --durations=10 python/ray/tests/test_modin.py + - python -m pytest -v --durations=30 python/ray/tests deploy: - provider: s3 access_key_id: AKIAJ2L7XDUSZVTXI5QA diff --git a/python/ray/remote_function.py b/python/ray/remote_function.py index 144fbbc44..ff9978017 100644 --- a/python/ray/remote_function.py +++ b/python/ray/remote_function.py @@ -57,9 +57,11 @@ class RemoteFunction(object): self._function_signature = ray.signature.extract_signature( self._function) - # # Export the function. + # Export the function. worker = ray.worker.get_global_worker() worker.function_actor_manager.export(self) + # In which session this function was exported last time. + self._last_export_session = worker._session_index def __call__(self, *args, **kwargs): raise Exception("Remote functions cannot be called directly. Instead " @@ -97,6 +99,13 @@ class RemoteFunction(object): """An experimental alternate way to submit remote functions.""" worker = ray.worker.get_global_worker() worker.check_connected() + + if self._last_export_session < worker._session_index: + # If this function was exported in a previous session, we need to + # export this function again, because current GCS doesn't have it. + self._last_export_session = worker._session_index + worker.function_actor_manager.export(self) + kwargs = {} if kwargs is None else kwargs args = ray.signature.extend_args(self._function_signature, args, kwargs) diff --git a/python/ray/tests/test_object_manager.py b/python/ray/tests/test_object_manager.py index 7c4ffa92f..1626312be 100644 --- a/python/ray/tests/test_object_manager.py +++ b/python/ray/tests/test_object_manager.py @@ -13,6 +13,11 @@ import warnings import ray from ray.tests.cluster_utils import Cluster +# TODO(yuhguo): This test file requires a lot of CPU/memory, and +# better be put in Jenkins. However, it fails frequently in Jenkins, but +# works well in Travis. We should consider moving it back to Jenkins once +# we figure out the reason. + if (multiprocessing.cpu_count() < 40 or ray.utils.get_system_memory() < 50 * 10**9): warnings.warn("This test must be run on large machines.") diff --git a/python/ray/worker.py b/python/ray/worker.py index ec20f9ab9..bb7406555 100644 --- a/python/ray/worker.py +++ b/python/ray/worker.py @@ -161,6 +161,9 @@ class Worker(object): # This event is checked regularly by all of the threads so that they # know when to exit. self.threads_stopped = threading.Event() + # Index of the current session. This number will + # increment every time when `ray.shutdown` is called. + self._session_index = 0 @property def task_context(self): @@ -2064,6 +2067,7 @@ def disconnect(): if hasattr(worker, "logger_thread"): worker.logger_thread.join() worker.threads_stopped.clear() + worker._session_index += 1 worker.connected = False worker.cached_functions_to_run = []