From 63ad2e3340cc7512b41163506d420ba433d4e77c Mon Sep 17 00:00:00 2001 From: Max Fitton Date: Sat, 29 Aug 2020 23:18:23 -0700 Subject: [PATCH] [Dashboard] Fix Issue #10319 - Dashboard autoscaler crash (#10323) * Patch error that occurred when there was an entry in the dashboard logs or errors internal data structures, and a worker was removed from the cluster. This would crash the cluster with a KeyError. * lint Co-authored-by: Max Fitton --- python/ray/dashboard/node_stats.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/python/ray/dashboard/node_stats.py b/python/ray/dashboard/node_stats.py index 63eb420e8..2ad1b3eca 100644 --- a/python/ray/dashboard/node_stats.py +++ b/python/ray/dashboard/node_stats.py @@ -60,14 +60,22 @@ class NodeStats(threading.Thread): def _insert_log_counts(self): for ip, logs_by_pid in self._logs.items(): hostname = self._ip_to_hostname[ip] - logs_by_pid = {pid: len(logs) for pid, logs in logs_by_pid.items()} - self._node_stats[hostname]["log_count"] = logs_by_pid + if hostname in self._node_stats: + logs_by_pid = { + pid: len(logs) + for pid, logs in logs_by_pid.items() + } + self._node_stats[hostname]["log_count"] = logs_by_pid def _insert_error_counts(self): for ip, errs_by_pid in self._errors.items(): hostname = self._ip_to_hostname[ip] - errs_by_pid = {pid: len(errs) for pid, errs in errs_by_pid.items()} - self._node_stats[hostname]["error_count"] = errs_by_pid + if hostname in self._node_stats: + errs_by_pid = { + pid: len(errs) + for pid, errs in errs_by_pid.items() + } + self._node_stats[hostname]["error_count"] = errs_by_pid def _purge_outdated_stats(self): def current(then, now):