[Autoscaler] Display node status tag in autsocaler status (#13561)

* .

* .

* .

* .

* .

* lint

Co-authored-by: Alex Wu <alex@anyscale.com>
This commit is contained in:
Alex Wu
2021-01-20 19:20:54 -08:00
committed by GitHub
parent a09997dc9e
commit b9ac3878ae
3 changed files with 10 additions and 8 deletions
+1 -1
View File
@@ -765,7 +765,7 @@ class StandardAutoscaler:
]
is_pending = status in pending_states
if is_pending:
pending_nodes.append((ip, node_type))
pending_nodes.append((ip, node_type, status))
else:
# TODO (Alex): Failed nodes are now immediately killed, so
# this list will almost always be empty. We should ideally
+2 -2
View File
@@ -362,8 +362,8 @@ def format_info_string(lm_summary, autoscaler_summary, time=None):
for node_type, count in autoscaler_summary.pending_launches.items():
line = f" {node_type}, {count} launching"
pending_lines.append(line)
for ip, node_type in autoscaler_summary.pending_nodes:
line = f" {ip}: {node_type}, setting up"
for ip, node_type, status in autoscaler_summary.pending_nodes:
line = f" {ip}: {node_type}, {status.lower()}"
pending_lines.append(line)
if pending_lines:
pending_report = "\n".join(pending_lines)
@@ -28,7 +28,7 @@ from ray.core.generated.common_pb2 import Bundle, PlacementStrategy
from ray.autoscaler.tags import TAG_RAY_USER_NODE_TYPE, TAG_RAY_NODE_KIND, \
NODE_KIND_WORKER, TAG_RAY_NODE_STATUS, \
STATUS_UP_TO_DATE, STATUS_UNINITIALIZED, \
STATUS_UPDATE_FAILED, \
STATUS_UPDATE_FAILED, STATUS_WAITING_FOR_SSH, \
NODE_KIND_HEAD, NODE_TYPE_LEGACY_WORKER, \
NODE_TYPE_LEGACY_HEAD
from ray.test_utils import same_elements
@@ -1419,7 +1419,8 @@ class AutoscalingTest(unittest.TestCase):
assert summary.active_nodes["empty_node"] == 1
assert len(summary.active_nodes) == 2, summary.active_nodes
assert summary.pending_nodes == [("172.0.0.3", "p2.xlarge")]
assert summary.pending_nodes == [("172.0.0.3", "p2.xlarge",
STATUS_WAITING_FOR_SSH)]
assert summary.pending_launches == {"m4.16xlarge": 2}
assert summary.failed_nodes == [("172.0.0.4", "m4.4xlarge")]
@@ -2403,7 +2404,8 @@ def test_info_string():
"p3.2xlarge": 2,
"m4.4xlarge": 20
},
pending_nodes=[("1.2.3.4", "m4.4xlarge"), ("1.2.3.5", "m4.4xlarge")],
pending_nodes=[("1.2.3.4", "m4.4xlarge", STATUS_WAITING_FOR_SSH),
("1.2.3.5", "m4.4xlarge", STATUS_WAITING_FOR_SSH)],
pending_launches={"m4.4xlarge": 2},
failed_nodes=[("1.2.3.6", "p3.2xlarge")])
@@ -2416,8 +2418,8 @@ Healthy:
20 m4.4xlarge
Pending:
m4.4xlarge, 2 launching
1.2.3.4: m4.4xlarge, setting up
1.2.3.5: m4.4xlarge, setting up
1.2.3.4: m4.4xlarge, waiting-for-ssh
1.2.3.5: m4.4xlarge, waiting-for-ssh
Recent failures:
(no failures)