mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 21:53:18 +08:00
[Autoscaler] Display node status tag in autsocaler status (#13561)
* . * . * . * . * . * lint Co-authored-by: Alex Wu <alex@anyscale.com>
This commit is contained in:
@@ -765,7 +765,7 @@ class StandardAutoscaler:
|
||||
]
|
||||
is_pending = status in pending_states
|
||||
if is_pending:
|
||||
pending_nodes.append((ip, node_type))
|
||||
pending_nodes.append((ip, node_type, status))
|
||||
else:
|
||||
# TODO (Alex): Failed nodes are now immediately killed, so
|
||||
# this list will almost always be empty. We should ideally
|
||||
|
||||
@@ -362,8 +362,8 @@ def format_info_string(lm_summary, autoscaler_summary, time=None):
|
||||
for node_type, count in autoscaler_summary.pending_launches.items():
|
||||
line = f" {node_type}, {count} launching"
|
||||
pending_lines.append(line)
|
||||
for ip, node_type in autoscaler_summary.pending_nodes:
|
||||
line = f" {ip}: {node_type}, setting up"
|
||||
for ip, node_type, status in autoscaler_summary.pending_nodes:
|
||||
line = f" {ip}: {node_type}, {status.lower()}"
|
||||
pending_lines.append(line)
|
||||
if pending_lines:
|
||||
pending_report = "\n".join(pending_lines)
|
||||
|
||||
@@ -28,7 +28,7 @@ from ray.core.generated.common_pb2 import Bundle, PlacementStrategy
|
||||
from ray.autoscaler.tags import TAG_RAY_USER_NODE_TYPE, TAG_RAY_NODE_KIND, \
|
||||
NODE_KIND_WORKER, TAG_RAY_NODE_STATUS, \
|
||||
STATUS_UP_TO_DATE, STATUS_UNINITIALIZED, \
|
||||
STATUS_UPDATE_FAILED, \
|
||||
STATUS_UPDATE_FAILED, STATUS_WAITING_FOR_SSH, \
|
||||
NODE_KIND_HEAD, NODE_TYPE_LEGACY_WORKER, \
|
||||
NODE_TYPE_LEGACY_HEAD
|
||||
from ray.test_utils import same_elements
|
||||
@@ -1419,7 +1419,8 @@ class AutoscalingTest(unittest.TestCase):
|
||||
assert summary.active_nodes["empty_node"] == 1
|
||||
assert len(summary.active_nodes) == 2, summary.active_nodes
|
||||
|
||||
assert summary.pending_nodes == [("172.0.0.3", "p2.xlarge")]
|
||||
assert summary.pending_nodes == [("172.0.0.3", "p2.xlarge",
|
||||
STATUS_WAITING_FOR_SSH)]
|
||||
assert summary.pending_launches == {"m4.16xlarge": 2}
|
||||
|
||||
assert summary.failed_nodes == [("172.0.0.4", "m4.4xlarge")]
|
||||
@@ -2403,7 +2404,8 @@ def test_info_string():
|
||||
"p3.2xlarge": 2,
|
||||
"m4.4xlarge": 20
|
||||
},
|
||||
pending_nodes=[("1.2.3.4", "m4.4xlarge"), ("1.2.3.5", "m4.4xlarge")],
|
||||
pending_nodes=[("1.2.3.4", "m4.4xlarge", STATUS_WAITING_FOR_SSH),
|
||||
("1.2.3.5", "m4.4xlarge", STATUS_WAITING_FOR_SSH)],
|
||||
pending_launches={"m4.4xlarge": 2},
|
||||
failed_nodes=[("1.2.3.6", "p3.2xlarge")])
|
||||
|
||||
@@ -2416,8 +2418,8 @@ Healthy:
|
||||
20 m4.4xlarge
|
||||
Pending:
|
||||
m4.4xlarge, 2 launching
|
||||
1.2.3.4: m4.4xlarge, setting up
|
||||
1.2.3.5: m4.4xlarge, setting up
|
||||
1.2.3.4: m4.4xlarge, waiting-for-ssh
|
||||
1.2.3.5: m4.4xlarge, waiting-for-ssh
|
||||
Recent failures:
|
||||
(no failures)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user