[core] Add Recursive task cancelation (#11923)

This commit is contained in:
Ian Rodney
2020-11-18 15:18:40 -08:00
committed by GitHub
parent e2a147d5fb
commit e086ddc18f
12 changed files with 118 additions and 23 deletions
+3 -2
View File
@@ -1170,13 +1170,14 @@ cdef class CoreWorker:
check_status(CCoreWorkerProcess.GetCoreWorker().KillActor(
c_actor_id, True, no_restart))
def cancel_task(self, ObjectRef object_ref, c_bool force_kill):
def cancel_task(self, ObjectRef object_ref, c_bool force_kill,
c_bool recursive):
cdef:
CObjectID c_object_id = object_ref.native()
CRayStatus status = CRayStatus.OK()
status = CCoreWorkerProcess.GetCoreWorker().CancelTask(
c_object_id, force_kill)
c_object_id, force_kill, recursive)
if not status.ok():
raise TypeError(status.message().decode())
+2 -1
View File
@@ -110,7 +110,8 @@ cdef extern from "ray/core_worker/core_worker.h" nogil:
CRayStatus KillActor(
const CActorID &actor_id, c_bool force_kill,
c_bool no_restart)
CRayStatus CancelTask(const CObjectID &object_id, c_bool force_kill)
CRayStatus CancelTask(const CObjectID &object_id, c_bool force_kill,
c_bool recursive)
unique_ptr[CProfileEvent] CreateProfileEvent(
const c_string &event_type)
+32
View File
@@ -258,5 +258,37 @@ def test_remote_cancel(ray_start_regular, use_force):
ray.get(inner, timeout=10)
@pytest.mark.parametrize("use_force", [True, False])
def test_recursive_cancel(shutdown_only, use_force):
ray.init(num_cpus=4)
@ray.remote(num_cpus=1)
def inner():
while True:
time.sleep(0.1)
@ray.remote(num_cpus=1)
def outer():
x = [inner.remote()]
print(x)
while True:
time.sleep(0.1)
@ray.remote(num_cpus=4)
def many_resources():
return 300
outer_fut = outer.remote()
many_fut = many_resources.remote()
with pytest.raises(GetTimeoutError):
ray.get(many_fut, timeout=1)
ray.cancel(outer_fut)
with pytest.raises(valid_exceptions(use_force)):
ray.get(outer_fut, timeout=10)
assert ray.get(many_fut, timeout=30)
if __name__ == "__main__":
sys.exit(pytest.main(["-v", __file__]))
+4 -2
View File
@@ -1575,7 +1575,7 @@ def kill(actor, *, no_restart=True):
worker.core_worker.kill_actor(actor._ray_actor_id, no_restart)
def cancel(object_ref, *, force=False):
def cancel(object_ref, *, force=False, recursive=True):
"""Cancels a task according to the following conditions.
If the specified task is pending execution, it will not be executed. If
@@ -1595,6 +1595,8 @@ def cancel(object_ref, *, force=False):
that should be canceled.
force (boolean): Whether to force-kill a running task by killing
the worker that is running the task.
recursive (boolean): Whether to try to cancel tasks submitted by the
task specified.
Raises:
TypeError: This is also raised for actor tasks.
"""
@@ -1605,7 +1607,7 @@ def cancel(object_ref, *, force=False):
raise TypeError(
"ray.cancel() only supported for non-actor object refs. "
f"Got: {type(object_ref)}.")
return worker.core_worker.cancel_task(object_ref, force)
return worker.core_worker.cancel_task(object_ref, force, recursive)
def _mode(worker=global_worker):