mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 10:01:11 +08:00
Fix actor reconstruction with direct call (#6570)
This commit is contained in:
@@ -836,7 +836,6 @@ def test_actor_init_fails(ray_start_cluster_head):
|
||||
assert results == [1 for actor in actors]
|
||||
|
||||
|
||||
@pytest.mark.skipif(RAY_FORCE_DIRECT, reason="no ft yet")
|
||||
def test_reconstruction_suppression(ray_start_cluster_head):
|
||||
cluster = ray_start_cluster_head
|
||||
num_nodes = 5
|
||||
|
||||
@@ -12,6 +12,8 @@ import pytest
|
||||
import ray
|
||||
import ray.ray_constants as ray_constants
|
||||
|
||||
RAY_FORCE_DIRECT = ray_constants.direct_call_enabled()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ray_start_cluster", [{
|
||||
@@ -29,13 +31,16 @@ def test_actor_creation_node_failure(ray_start_cluster):
|
||||
def __init__(self, death_probability):
|
||||
self.death_probability = death_probability
|
||||
|
||||
def get_probability(self):
|
||||
return self.death_probability
|
||||
|
||||
def ping(self):
|
||||
# Exit process with some probability.
|
||||
exit_chance = np.random.rand()
|
||||
if exit_chance < self.death_probability:
|
||||
sys.exit(-1)
|
||||
|
||||
num_children = 50
|
||||
num_children = 25
|
||||
# Children actors will die about half the time.
|
||||
death_probability = 0.5
|
||||
|
||||
@@ -58,6 +63,21 @@ def test_actor_creation_node_failure(ray_start_cluster):
|
||||
ray.get(out)
|
||||
except ray.exceptions.RayActorError:
|
||||
children[i] = Child.remote(death_probability)
|
||||
|
||||
if (RAY_FORCE_DIRECT):
|
||||
children_out = [
|
||||
child.get_probability.remote() for child in children
|
||||
]
|
||||
# Wait for new created actors to finish creation before
|
||||
# removing a node. This is needed because right now we don't
|
||||
# support reconstructing actors that died in the process of
|
||||
# being created.
|
||||
ready, _ = ray.wait(
|
||||
children_out,
|
||||
num_returns=len(children_out),
|
||||
timeout=5 * 60.0)
|
||||
assert len(ready) == len(children_out)
|
||||
|
||||
# Remove a node. Any actor creation tasks that were forwarded to this
|
||||
# node must be reconstructed.
|
||||
cluster.remove_node(cluster.list_all_nodes()[-1])
|
||||
|
||||
Reference in New Issue
Block a user