mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 19:49:04 +08:00
[core] Disable GCS reconnect (#10579)
* Set default GCS retries to 1 * Fix cc test
This commit is contained in:
@@ -21,8 +21,10 @@ def increase(x):
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ray_start_regular",
|
||||
[generate_system_config_map(num_heartbeats_timeout=20)],
|
||||
"ray_start_regular", [
|
||||
generate_system_config_map(
|
||||
num_heartbeats_timeout=20, ping_gcs_rpc_server_max_retries=60)
|
||||
],
|
||||
indirect=True)
|
||||
def test_gcs_server_restart(ray_start_regular):
|
||||
actor1 = Increase.remote()
|
||||
@@ -44,8 +46,10 @@ def test_gcs_server_restart(ray_start_regular):
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ray_start_regular",
|
||||
[generate_system_config_map(num_heartbeats_timeout=20)],
|
||||
"ray_start_regular", [
|
||||
generate_system_config_map(
|
||||
num_heartbeats_timeout=20, ping_gcs_rpc_server_max_retries=60)
|
||||
],
|
||||
indirect=True)
|
||||
def test_gcs_server_restart_during_actor_creation(ray_start_regular):
|
||||
ids = []
|
||||
@@ -63,8 +67,10 @@ def test_gcs_server_restart_during_actor_creation(ray_start_regular):
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ray_start_cluster_head",
|
||||
[generate_system_config_map(num_heartbeats_timeout=20)],
|
||||
"ray_start_cluster_head", [
|
||||
generate_system_config_map(
|
||||
num_heartbeats_timeout=20, ping_gcs_rpc_server_max_retries=60)
|
||||
],
|
||||
indirect=True)
|
||||
def test_node_failure_detector_when_gcs_server_restart(ray_start_cluster_head):
|
||||
"""Checks that the node failure detector is correct when gcs server restart.
|
||||
|
||||
@@ -8,8 +8,22 @@ import ray
|
||||
from ray.test_utils import (
|
||||
RayTestTimeoutException, check_call_ray, run_string_as_driver,
|
||||
run_string_as_driver_nonblocking, wait_for_children_of_pid,
|
||||
wait_for_children_of_pid_to_exit, kill_process_by_name, Semaphore,
|
||||
init_error_pubsub, get_error_message)
|
||||
wait_for_children_of_pid_to_exit, wait_for_condition, kill_process_by_name,
|
||||
Semaphore, init_error_pubsub, get_error_message)
|
||||
|
||||
|
||||
def test_remote_raylet_cleanup(ray_start_cluster):
|
||||
cluster = ray_start_cluster
|
||||
cluster.add_node()
|
||||
cluster.add_node()
|
||||
cluster.add_node()
|
||||
cluster.wait_for_nodes()
|
||||
|
||||
def remote_raylets_dead():
|
||||
return not cluster.remaining_processes_alive()
|
||||
|
||||
cluster.remove_node(cluster.head_node, allow_graceful=False)
|
||||
wait_for_condition(remote_raylets_dead)
|
||||
|
||||
|
||||
def test_error_isolation(call_ray_start):
|
||||
|
||||
Reference in New Issue
Block a user