mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 18:29:08 +08:00
e69664b74b
* Windows compatibility bug fixes * Use WSASend/WSARecv as WSASendMsg/WSARecvMsg do not work with TCP sockets * Clean up some TODOs * Fix duplicate compilations * RedisAsioClient boost::asio::error::connection_reset Co-authored-by: Mehrdad <noreply@github.com>
49 lines
1.7 KiB
Python
49 lines
1.7 KiB
Python
import os
|
|
import signal
|
|
import sys
|
|
import time
|
|
"""
|
|
This is a lightweight "reaper" process used to ensure that ray processes are
|
|
cleaned up properly when the main ray process dies unexpectedly (e.g.,
|
|
segfaults or gets SIGKILLed). Note that processes may not be cleaned up
|
|
properly if this process is SIGTERMed or SIGKILLed.
|
|
|
|
It detects that its parent has died by reading from stdin, which must be
|
|
inherited from the parent process so that the OS will deliver an EOF if the
|
|
parent dies. When this happens, the reaper process kills the rest of its
|
|
process group (first attempting graceful shutdown with SIGTERM, then escalating
|
|
to SIGKILL).
|
|
"""
|
|
|
|
SIGTERM_GRACE_PERIOD_SECONDS = 1
|
|
|
|
|
|
def reap_process_group(*args):
|
|
def sigterm_handler(*args):
|
|
# Give a one-second grace period for other processes to clean up.
|
|
time.sleep(SIGTERM_GRACE_PERIOD_SECONDS)
|
|
# SIGKILL the pgroup (including ourselves) as a last-resort.
|
|
os.killpg(0, signal.SIGKILL)
|
|
|
|
# Set a SIGTERM handler to handle SIGTERMing ourselves with the group.
|
|
signal.signal(signal.SIGTERM, sigterm_handler)
|
|
|
|
# Our parent must have died, SIGTERM the group (including ourselves).
|
|
os.killpg(0, signal.SIGTERM)
|
|
|
|
|
|
def main():
|
|
# Read from stdout forever. Because stdout is a file descriptor
|
|
# inherited from our parent process, we will get an EOF if the parent
|
|
# dies, which is signaled by an empty return from read().
|
|
# We intentionally don't set any signal handlers here, so a SIGTERM from
|
|
# the parent can be used to kill this process gracefully without it killing
|
|
# the rest of the process group.
|
|
while len(sys.stdin.read()) != 0:
|
|
pass
|
|
reap_process_group()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|