From b9ac3878ae615eb1926e00ec0a9a5fa67c27c30e Mon Sep 17 00:00:00 2001 From: Alex Wu Date: Wed, 20 Jan 2021 19:20:54 -0800 Subject: [PATCH] [Autoscaler] Display node status tag in autsocaler status (#13561) * . * . * . * . * . * lint Co-authored-by: Alex Wu --- python/ray/autoscaler/_private/autoscaler.py | 2 +- python/ray/autoscaler/_private/util.py | 4 ++-- python/ray/tests/test_resource_demand_scheduler.py | 12 +++++++----- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/python/ray/autoscaler/_private/autoscaler.py b/python/ray/autoscaler/_private/autoscaler.py index 2838e24c1..1166597ed 100644 --- a/python/ray/autoscaler/_private/autoscaler.py +++ b/python/ray/autoscaler/_private/autoscaler.py @@ -765,7 +765,7 @@ class StandardAutoscaler: ] is_pending = status in pending_states if is_pending: - pending_nodes.append((ip, node_type)) + pending_nodes.append((ip, node_type, status)) else: # TODO (Alex): Failed nodes are now immediately killed, so # this list will almost always be empty. We should ideally diff --git a/python/ray/autoscaler/_private/util.py b/python/ray/autoscaler/_private/util.py index 81a2c1fc0..1e677e35b 100644 --- a/python/ray/autoscaler/_private/util.py +++ b/python/ray/autoscaler/_private/util.py @@ -362,8 +362,8 @@ def format_info_string(lm_summary, autoscaler_summary, time=None): for node_type, count in autoscaler_summary.pending_launches.items(): line = f" {node_type}, {count} launching" pending_lines.append(line) - for ip, node_type in autoscaler_summary.pending_nodes: - line = f" {ip}: {node_type}, setting up" + for ip, node_type, status in autoscaler_summary.pending_nodes: + line = f" {ip}: {node_type}, {status.lower()}" pending_lines.append(line) if pending_lines: pending_report = "\n".join(pending_lines) diff --git a/python/ray/tests/test_resource_demand_scheduler.py b/python/ray/tests/test_resource_demand_scheduler.py index 4b2027af1..3bfe28f7c 100644 --- a/python/ray/tests/test_resource_demand_scheduler.py +++ b/python/ray/tests/test_resource_demand_scheduler.py @@ -28,7 +28,7 @@ from ray.core.generated.common_pb2 import Bundle, PlacementStrategy from ray.autoscaler.tags import TAG_RAY_USER_NODE_TYPE, TAG_RAY_NODE_KIND, \ NODE_KIND_WORKER, TAG_RAY_NODE_STATUS, \ STATUS_UP_TO_DATE, STATUS_UNINITIALIZED, \ - STATUS_UPDATE_FAILED, \ + STATUS_UPDATE_FAILED, STATUS_WAITING_FOR_SSH, \ NODE_KIND_HEAD, NODE_TYPE_LEGACY_WORKER, \ NODE_TYPE_LEGACY_HEAD from ray.test_utils import same_elements @@ -1419,7 +1419,8 @@ class AutoscalingTest(unittest.TestCase): assert summary.active_nodes["empty_node"] == 1 assert len(summary.active_nodes) == 2, summary.active_nodes - assert summary.pending_nodes == [("172.0.0.3", "p2.xlarge")] + assert summary.pending_nodes == [("172.0.0.3", "p2.xlarge", + STATUS_WAITING_FOR_SSH)] assert summary.pending_launches == {"m4.16xlarge": 2} assert summary.failed_nodes == [("172.0.0.4", "m4.4xlarge")] @@ -2403,7 +2404,8 @@ def test_info_string(): "p3.2xlarge": 2, "m4.4xlarge": 20 }, - pending_nodes=[("1.2.3.4", "m4.4xlarge"), ("1.2.3.5", "m4.4xlarge")], + pending_nodes=[("1.2.3.4", "m4.4xlarge", STATUS_WAITING_FOR_SSH), + ("1.2.3.5", "m4.4xlarge", STATUS_WAITING_FOR_SSH)], pending_launches={"m4.4xlarge": 2}, failed_nodes=[("1.2.3.6", "p3.2xlarge")]) @@ -2416,8 +2418,8 @@ Healthy: 20 m4.4xlarge Pending: m4.4xlarge, 2 launching - 1.2.3.4: m4.4xlarge, setting up - 1.2.3.5: m4.4xlarge, setting up + 1.2.3.4: m4.4xlarge, waiting-for-ssh + 1.2.3.5: m4.4xlarge, waiting-for-ssh Recent failures: (no failures)