[flaky test] Fix test_calling_start_ray_head (#5644)

This commit is contained in:
Edward Oakes
2019-09-14 22:27:45 -07:00
committed by Philipp Moritz
parent 74a34b736d
commit a8888c5ff4
4 changed files with 106 additions and 55 deletions
+50 -6
View File
@@ -61,9 +61,12 @@ class Node(object):
connect_only (bool): If true, connect to the node without starting
new processes.
"""
if shutdown_at_exit and connect_only:
raise ValueError("'shutdown_at_exit' and 'connect_only' cannot "
"be both true.")
if shutdown_at_exit:
if connect_only:
raise ValueError("'shutdown_at_exit' and 'connect_only' "
"cannot both be true.")
self._register_shutdown_hooks()
self.head = head
self.all_processes = {}
@@ -152,9 +155,50 @@ class Node(object):
if not connect_only:
self.start_ray_processes()
if shutdown_at_exit:
atexit.register(lambda: self.kill_all_processes(
check_alive=False, allow_graceful=True))
def _register_shutdown_hooks(self):
# Make ourselves a process group session leader to ensure we can clean
# up child processes later without killing a process that started us.
try:
os.setpgrp()
except OSError as e:
logger.warning("setpgrp failed, processes may not be "
"cleaned up properly: {}.".format(e))
# Clean up child process by first going through the normal
# kill_all_processes procedure (which should clean them all up
# under normal circumstances), then sending a SIGTERM to our
# process group to take care of any children that may have been
# spawned but not yet added to the list.
def clean_up_children(sigterm_handler):
self.kill_all_processes(check_alive=False, allow_graceful=True)
signal.signal(signal.SIGTERM, sigterm_handler)
try:
# SIGTERM our process group as a last resort in case there
# were processes that we spawned but didn't add to the list
# (could happen if interrupted just after spawning them).
# We could send SIGKILL here to be sure, but we're also
# sending it to ourselves.
os.killpg(0, signal.SIGTERM)
except OSError as e:
print("killpg failed, processes may not have "
"been cleaned up properly: {}.".format(e))
# Register the a handler to be called during the normal python
# shutdown process. We pass an empty lambda to clean_up_children
# because after cleaning up the child processes, it should do
# nothing and return so that the shutdown process can continue.
def atexit_handler():
return clean_up_children(lambda *args, **kwargs: None)
atexit.register(atexit_handler)
# Register the a handler to be called if we get a SIGTERM.
# In this case, we want to exit with an error code (1) after
# cleaning up child processes.
def sigterm_handler():
return clean_up_children(lambda *args, **kwargs: sys.exit(1))
signal.signal(signal.SIGTERM, sigterm_handler)
def _init_temp(self, redis_client):
# Create an dictionary to store temp file index.