[GCS]Fix miss PollOwnerForActorOutOfScope after gcs restarts bug (#11054)

* fix_RemoveActorFromOwner_crash_bug

* fix review comment

* fix review comment

* rm unused ut

* add testcase

* fix review comment

* rm unused import

* fix code style

* fix ut bug

Co-authored-by: 灵洵 <fengbin.ffb@antfin.com>
This commit is contained in:
fangfengbin
2020-09-29 01:06:40 +08:00
committed by GitHub
parent 79c6a6fa02
commit 872219940b
3 changed files with 49 additions and 14 deletions
@@ -126,6 +126,39 @@ def test_node_failure_detector_when_gcs_server_restart(ray_start_cluster_head):
wait_for_condition(condition, timeout=10)
@pytest.mark.parametrize(
"ray_start_regular", [
generate_system_config_map(
num_heartbeats_timeout=20, ping_gcs_rpc_server_max_retries=60)
],
indirect=True)
def test_del_actor_after_gcs_server_restart(ray_start_regular):
actor = Increase.options(name="abc").remote()
result = ray.get(actor.method.remote(1))
assert result == 3
ray.worker._global_node.kill_gcs_server()
ray.worker._global_node.start_gcs_server()
actor_id = actor._actor_id.hex()
del actor
def condition():
actor_status = ray.actors(actor_id=actor_id)
if actor_status["State"] == ray.gcs_utils.ActorTableData.DEAD:
return True
else:
return False
# Wait for the actor dead.
wait_for_condition(condition, timeout=10)
# If `PollOwnerForActorOutOfScope` was successfully called,
# name should be properly deleted.
with pytest.raises(ValueError):
ray.get_actor("abc")
if __name__ == "__main__":
import pytest
sys.exit(pytest.main(["-v", __file__]))