[core] fix bug that actor tasks from reconstructed actor is ignored by scheduling queue (#7637)

This commit is contained in:
Zhijun Fu
2020-03-21 13:05:24 +08:00
committed by GitHub
parent 1b90196bef
commit a7a5d172b1
10 changed files with 384 additions and 35 deletions
+70
View File
@@ -207,6 +207,76 @@ def test_actor_reconstruction_without_task(ray_start_regular):
assert wait_for_condition(check_reconstructed)
def test_caller_actor_reconstruction(ray_start_regular):
"""Test tasks from a reconstructed actor can be correctly processed
by the receiving actor."""
@ray.remote(max_reconstructions=1)
class ReconstructableActor:
"""An actor that will be reconstructed at most once."""
def __init__(self, actor):
self.actor = actor
def increase(self):
return ray.get(self.actor.increase.remote())
def get_pid(self):
return os.getpid()
@ray.remote(max_reconstructions=1)
class Actor:
"""An actor that will be reconstructed at most once."""
def __init__(self):
self.value = 0
def increase(self):
self.value += 1
return self.value
remote_actor = Actor.remote()
actor = ReconstructableActor.remote(remote_actor)
# Call increase 3 times
for _ in range(3):
ray.get(actor.increase.remote())
# kill the actor.
# TODO(zhijunfu): use ray.kill instead.
kill_actor(actor)
# Check that we can still call the actor.
assert ray.get(actor.increase.remote()) == 4
def test_caller_task_reconstruction(ray_start_regular):
"""Test a retried task from a dead worker can be correctly processed
by the receiving actor."""
@ray.remote(max_retries=5)
def RetryableTask(actor):
value = ray.get(actor.increase.remote())
if value > 2:
return value
else:
os._exit(0)
@ray.remote(max_reconstructions=1)
class Actor:
"""An actor that will be reconstructed at most once."""
def __init__(self):
self.value = 0
def increase(self):
self.value += 1
return self.value
remote_actor = Actor.remote()
assert ray.get(RetryableTask.remote(remote_actor)) == 3
def test_actor_reconstruction_on_node_failure(ray_start_cluster_head):
"""Test actor reconstruction when node dies unexpectedly."""
cluster = ray_start_cluster_head