Use prctl(PR_SET_PDEATHSIG) on Linux instead of reaper (#7150)

This commit is contained in:
mehrdadn
2020-03-03 09:45:42 -08:00
committed by GitHub
parent f5b1062ed9
commit 4d42664b2a
4 changed files with 244 additions and 46 deletions
+144
View File
@@ -6,6 +6,7 @@ import logging
import numpy as np
import os
import six
import subprocess
import sys
import threading
import time
@@ -18,6 +19,15 @@ import psutil
logger = logging.getLogger(__name__)
# Linux can bind child processes' lifetimes to that of their parents via prctl.
# prctl support is detected dynamically once, and assumed thereafter.
linux_prctl = None
# Windows can bind processes' lifetimes to that of kernel-level "job objects".
# We keep a global job object to tie its lifetime to that of our own process.
win32_job = None
win32_AssignProcessToJobObject = None
def _random_string():
id_hash = hashlib.sha1()
@@ -496,6 +506,140 @@ def is_main_thread():
return threading.current_thread().getName() == "MainThread"
def detect_fate_sharing_support_win32():
global win32_job, win32_AssignProcessToJobObject
if win32_job is None and sys.platform == "win32":
import ctypes
try:
from ctypes.wintypes import BOOL, DWORD, HANDLE, LPVOID, LPCWSTR
kernel32 = ctypes.WinDLL("kernel32")
kernel32.CreateJobObjectW.argtypes = (LPVOID, LPCWSTR)
kernel32.CreateJobObjectW.restype = HANDLE
sijo_argtypes = (HANDLE, ctypes.c_int, LPVOID, DWORD)
kernel32.SetInformationJobObject.argtypes = sijo_argtypes
kernel32.SetInformationJobObject.restype = BOOL
kernel32.AssignProcessToJobObject.argtypes = (HANDLE, HANDLE)
kernel32.AssignProcessToJobObject.restype = BOOL
except (AttributeError, TypeError, ImportError):
kernel32 = None
job = kernel32.CreateJobObjectW(None, None) if kernel32 else None
job = subprocess.Handle(job) if job else job
if job:
from ctypes.wintypes import DWORD, LARGE_INTEGER, ULARGE_INTEGER
class JOBOBJECT_BASIC_LIMIT_INFORMATION(ctypes.Structure):
_fields_ = [
("PerProcessUserTimeLimit", LARGE_INTEGER),
("PerJobUserTimeLimit", LARGE_INTEGER),
("LimitFlags", DWORD),
("MinimumWorkingSetSize", ctypes.c_size_t),
("MaximumWorkingSetSize", ctypes.c_size_t),
("ActiveProcessLimit", DWORD),
("Affinity", ctypes.c_size_t),
("PriorityClass", DWORD),
("SchedulingClass", DWORD),
]
class IO_COUNTERS(ctypes.Structure):
_fields_ = [
("ReadOperationCount", ULARGE_INTEGER),
("WriteOperationCount", ULARGE_INTEGER),
("OtherOperationCount", ULARGE_INTEGER),
("ReadTransferCount", ULARGE_INTEGER),
("WriteTransferCount", ULARGE_INTEGER),
("OtherTransferCount", ULARGE_INTEGER),
]
class JOBOBJECT_EXTENDED_LIMIT_INFORMATION(ctypes.Structure):
_fields_ = [
("BasicLimitInformation",
JOBOBJECT_BASIC_LIMIT_INFORMATION),
("IoInfo", IO_COUNTERS),
("ProcessMemoryLimit", ctypes.c_size_t),
("JobMemoryLimit", ctypes.c_size_t),
("PeakProcessMemoryUsed", ctypes.c_size_t),
("PeakJobMemoryUsed", ctypes.c_size_t),
]
# Defined in <WinNT.h>; also available here:
# https://docs.microsoft.com/en-us/windows/win32/api/jobapi2/nf-jobapi2-setinformationjobobject
JobObjectExtendedLimitInformation = 9
JOB_OBJECT_LIMIT_BREAKAWAY_OK = 0x00000800
JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE = 0x00002000
buf = JOBOBJECT_EXTENDED_LIMIT_INFORMATION()
buf.BasicLimitInformation.LimitFlags = (
JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE
| JOB_OBJECT_LIMIT_BREAKAWAY_OK)
infoclass = JobObjectExtendedLimitInformation
if not kernel32.SetInformationJobObject(
job, infoclass, ctypes.byref(buf), ctypes.sizeof(buf)):
job = None
win32_AssignProcessToJobObject = (kernel32.AssignProcessToJobObject
if kernel32 is not None else False)
win32_job = job if job else False
return bool(win32_job)
def detect_fate_sharing_support_linux():
global linux_prctl
if linux_prctl is None and sys.platform.startswith("linux"):
try:
from ctypes import c_int, c_ulong, CDLL
prctl = CDLL(None).prctl
prctl.restype = c_int
prctl.argtypes = [c_int, c_ulong, c_ulong, c_ulong, c_ulong]
except (AttributeError, TypeError):
prctl = None
linux_prctl = prctl if prctl else False
return bool(linux_prctl)
def detect_fate_sharing_support():
result = None
if sys.platform == "win32":
result = detect_fate_sharing_support_win32()
elif sys.platform.startswith("linux"):
result = detect_fate_sharing_support_linux()
return result
def set_kill_on_parent_death_linux():
"""Ensures this process dies if its parent dies (fate-sharing).
Linux-only. Must be called in preexec_fn (i.e. by the child).
"""
if detect_fate_sharing_support_linux():
import signal
PR_SET_PDEATHSIG = 1
if linux_prctl(PR_SET_PDEATHSIG, signal.SIGKILL, 0, 0, 0) != 0:
import ctypes
raise OSError(ctypes.get_errno(), "prctl(PR_SET_PDEATHSIG) failed")
else:
assert False, "PR_SET_PDEATHSIG used despite being unavailable"
def set_kill_child_on_death_win32(child_proc):
"""Ensures the child process dies if this process dies (fate-sharing).
Windows-only. Must be called by the parent, after spawning the child.
Args:
child_proc: The subprocess.Popen or subprocess.Handle object.
"""
if isinstance(child_proc, subprocess.Popen):
child_proc = child_proc._handle
assert isinstance(child_proc, subprocess.Handle)
if detect_fate_sharing_support_win32():
if not win32_AssignProcessToJobObject(win32_job, int(child_proc)):
import ctypes
raise OSError(ctypes.get_last_error(),
"AssignProcessToJobObject() failed")
else:
assert False, "AssignProcessToJobObject used despite being unavailable"
def try_make_directory_shared(directory_path):
try:
os.chmod(directory_path, 0o0777)