mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 17:02:43 +08:00
[core] Add Recursive task cancelation (#11923)
This commit is contained in:
@@ -1170,13 +1170,14 @@ cdef class CoreWorker:
|
||||
check_status(CCoreWorkerProcess.GetCoreWorker().KillActor(
|
||||
c_actor_id, True, no_restart))
|
||||
|
||||
def cancel_task(self, ObjectRef object_ref, c_bool force_kill):
|
||||
def cancel_task(self, ObjectRef object_ref, c_bool force_kill,
|
||||
c_bool recursive):
|
||||
cdef:
|
||||
CObjectID c_object_id = object_ref.native()
|
||||
CRayStatus status = CRayStatus.OK()
|
||||
|
||||
status = CCoreWorkerProcess.GetCoreWorker().CancelTask(
|
||||
c_object_id, force_kill)
|
||||
c_object_id, force_kill, recursive)
|
||||
|
||||
if not status.ok():
|
||||
raise TypeError(status.message().decode())
|
||||
|
||||
@@ -110,7 +110,8 @@ cdef extern from "ray/core_worker/core_worker.h" nogil:
|
||||
CRayStatus KillActor(
|
||||
const CActorID &actor_id, c_bool force_kill,
|
||||
c_bool no_restart)
|
||||
CRayStatus CancelTask(const CObjectID &object_id, c_bool force_kill)
|
||||
CRayStatus CancelTask(const CObjectID &object_id, c_bool force_kill,
|
||||
c_bool recursive)
|
||||
|
||||
unique_ptr[CProfileEvent] CreateProfileEvent(
|
||||
const c_string &event_type)
|
||||
|
||||
@@ -258,5 +258,37 @@ def test_remote_cancel(ray_start_regular, use_force):
|
||||
ray.get(inner, timeout=10)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_force", [True, False])
|
||||
def test_recursive_cancel(shutdown_only, use_force):
|
||||
ray.init(num_cpus=4)
|
||||
|
||||
@ray.remote(num_cpus=1)
|
||||
def inner():
|
||||
while True:
|
||||
time.sleep(0.1)
|
||||
|
||||
@ray.remote(num_cpus=1)
|
||||
def outer():
|
||||
|
||||
x = [inner.remote()]
|
||||
print(x)
|
||||
while True:
|
||||
time.sleep(0.1)
|
||||
|
||||
@ray.remote(num_cpus=4)
|
||||
def many_resources():
|
||||
return 300
|
||||
|
||||
outer_fut = outer.remote()
|
||||
many_fut = many_resources.remote()
|
||||
with pytest.raises(GetTimeoutError):
|
||||
ray.get(many_fut, timeout=1)
|
||||
ray.cancel(outer_fut)
|
||||
with pytest.raises(valid_exceptions(use_force)):
|
||||
ray.get(outer_fut, timeout=10)
|
||||
|
||||
assert ray.get(many_fut, timeout=30)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(pytest.main(["-v", __file__]))
|
||||
|
||||
@@ -1575,7 +1575,7 @@ def kill(actor, *, no_restart=True):
|
||||
worker.core_worker.kill_actor(actor._ray_actor_id, no_restart)
|
||||
|
||||
|
||||
def cancel(object_ref, *, force=False):
|
||||
def cancel(object_ref, *, force=False, recursive=True):
|
||||
"""Cancels a task according to the following conditions.
|
||||
|
||||
If the specified task is pending execution, it will not be executed. If
|
||||
@@ -1595,6 +1595,8 @@ def cancel(object_ref, *, force=False):
|
||||
that should be canceled.
|
||||
force (boolean): Whether to force-kill a running task by killing
|
||||
the worker that is running the task.
|
||||
recursive (boolean): Whether to try to cancel tasks submitted by the
|
||||
task specified.
|
||||
Raises:
|
||||
TypeError: This is also raised for actor tasks.
|
||||
"""
|
||||
@@ -1605,7 +1607,7 @@ def cancel(object_ref, *, force=False):
|
||||
raise TypeError(
|
||||
"ray.cancel() only supported for non-actor object refs. "
|
||||
f"Got: {type(object_ref)}.")
|
||||
return worker.core_worker.cancel_task(object_ref, force)
|
||||
return worker.core_worker.cancel_task(object_ref, force, recursive)
|
||||
|
||||
|
||||
def _mode(worker=global_worker):
|
||||
|
||||
Reference in New Issue
Block a user