mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 12:10:40 +08:00
Revert heartbeat interval to keep ci stable (#12836)
* Revert heartbeat interval to keep ci stable * fix missing one
This commit is contained in:
@@ -237,7 +237,7 @@ def test_actor_multiple_gpus_from_multiple_tasks(ray_start_cluster):
|
||||
cluster.add_node(
|
||||
num_cpus=10 * num_gpus_per_raylet,
|
||||
num_gpus=num_gpus_per_raylet,
|
||||
_system_config={"num_heartbeats_timeout": 100} if i == 0 else {})
|
||||
_system_config={"num_heartbeats_timeout": 1000} if i == 0 else {})
|
||||
ray.init(address=cluster.address)
|
||||
|
||||
@ray.remote
|
||||
|
||||
@@ -610,10 +610,9 @@ def test_lease_request_leak(shutdown_only):
|
||||
del obj_ref
|
||||
ray.get(tasks)
|
||||
|
||||
def _no_objects():
|
||||
return len(ray.objects()) == 0
|
||||
|
||||
wait_for_condition(_no_objects, timeout=10)
|
||||
time.sleep(
|
||||
1) # Sleep for an amount longer than the reconstruction timeout.
|
||||
assert len(ray.objects()) == 0, ray.objects()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
||||
@@ -143,7 +143,7 @@ def check_components_alive(cluster, component_type, check_component_alive):
|
||||
"num_cpus": 8,
|
||||
"num_nodes": 4,
|
||||
"_system_config": {
|
||||
"num_heartbeats_timeout": 10
|
||||
"num_heartbeats_timeout": 100
|
||||
},
|
||||
}],
|
||||
indirect=True)
|
||||
|
||||
@@ -76,7 +76,7 @@ def test_gcs_server_restart_during_actor_creation(ray_start_regular):
|
||||
@pytest.mark.parametrize(
|
||||
"ray_start_cluster_head", [
|
||||
generate_system_config_map(
|
||||
num_heartbeats_timeout=2, ping_gcs_rpc_server_max_retries=60)
|
||||
num_heartbeats_timeout=20, ping_gcs_rpc_server_max_retries=60)
|
||||
],
|
||||
indirect=True)
|
||||
def test_node_failure_detector_when_gcs_server_restart(ray_start_cluster_head):
|
||||
|
||||
@@ -34,7 +34,7 @@ def test_shutdown():
|
||||
@pytest.mark.parametrize(
|
||||
"ray_start_cluster_head", [
|
||||
generate_system_config_map(
|
||||
num_heartbeats_timeout=2, object_timeout_milliseconds=12345)
|
||||
num_heartbeats_timeout=20, object_timeout_milliseconds=12345)
|
||||
],
|
||||
indirect=True)
|
||||
def test_system_config(ray_start_cluster_head):
|
||||
@@ -52,12 +52,12 @@ def test_system_config(ray_start_cluster_head):
|
||||
@ray.remote
|
||||
def f():
|
||||
assert ray._config.object_timeout_milliseconds() == 12345
|
||||
assert ray._config.num_heartbeats_timeout() == 2
|
||||
assert ray._config.num_heartbeats_timeout() == 20
|
||||
|
||||
ray.get([f.remote() for _ in range(5)])
|
||||
|
||||
cluster.remove_node(worker, allow_graceful=False)
|
||||
time.sleep(0.9)
|
||||
time.sleep(1)
|
||||
assert ray.cluster_resources()["CPU"] == 2
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
@@ -1172,7 +1172,7 @@ ray.shutdown()
|
||||
@pytest.mark.parametrize(
|
||||
"ray_start_cluster_head", [
|
||||
generate_system_config_map(
|
||||
num_heartbeats_timeout=3, ping_gcs_rpc_server_max_retries=60)
|
||||
num_heartbeats_timeout=20, ping_gcs_rpc_server_max_retries=60)
|
||||
],
|
||||
indirect=True)
|
||||
def test_create_placement_group_after_gcs_server_restart(
|
||||
@@ -1210,7 +1210,7 @@ def test_create_placement_group_after_gcs_server_restart(
|
||||
@pytest.mark.parametrize(
|
||||
"ray_start_cluster_head", [
|
||||
generate_system_config_map(
|
||||
num_heartbeats_timeout=3, ping_gcs_rpc_server_max_retries=60)
|
||||
num_heartbeats_timeout=20, ping_gcs_rpc_server_max_retries=60)
|
||||
],
|
||||
indirect=True)
|
||||
def test_create_actor_with_placement_group_after_gcs_server_restart(
|
||||
@@ -1234,7 +1234,7 @@ def test_create_actor_with_placement_group_after_gcs_server_restart(
|
||||
@pytest.mark.parametrize(
|
||||
"ray_start_cluster_head", [
|
||||
generate_system_config_map(
|
||||
num_heartbeats_timeout=3, ping_gcs_rpc_server_max_retries=60)
|
||||
num_heartbeats_timeout=20, ping_gcs_rpc_server_max_retries=60)
|
||||
],
|
||||
indirect=True)
|
||||
def test_create_placement_group_during_gcs_server_restart(
|
||||
|
||||
Reference in New Issue
Block a user