diff --git a/python/ray/autoscaler/_private/load_metrics.py b/python/ray/autoscaler/_private/load_metrics.py index 1fadeae3b..b688fe617 100644 --- a/python/ray/autoscaler/_private/load_metrics.py +++ b/python/ray/autoscaler/_private/load_metrics.py @@ -79,27 +79,27 @@ class LoadMetrics: active_ips = set(active_ips) active_ips.add(self.local_ip) - def prune(mapping): + def prune(mapping, should_log): unwanted = set(mapping) - active_ips for unwanted_key in unwanted: - # TODO (Alex): Change this back to info after #12138. - logger.debug("LoadMetrics: " - "Removed mapping: {} - {}".format( - unwanted_key, mapping[unwanted_key])) + if should_log: + logger.info("LoadMetrics: " + "Removed mapping: {} - {}".format( + unwanted_key, mapping[unwanted_key])) del mapping[unwanted_key] - if unwanted: + if unwanted and should_log: # TODO (Alex): Change this back to info after #12138. - logger.debug( + logger.info( "LoadMetrics: " "Removed {} stale ip mappings: {} not in {}".format( len(unwanted), unwanted, active_ips)) assert not (unwanted & set(mapping)) - prune(self.last_used_time_by_ip) - prune(self.static_resources_by_ip) - prune(self.dynamic_resources_by_ip) - prune(self.resource_load_by_ip) - prune(self.last_heartbeat_time_by_ip) + prune(self.last_used_time_by_ip, should_log=True) + prune(self.static_resources_by_ip, should_log=False) + prune(self.dynamic_resources_by_ip, should_log=False) + prune(self.resource_load_by_ip, should_log=False) + prune(self.last_heartbeat_time_by_ip, should_log=False) def get_node_resources(self): """Return a list of node resources (static resource sizes). diff --git a/src/ray/gcs/gcs_server/gcs_node_manager.cc b/src/ray/gcs/gcs_server/gcs_node_manager.cc index 820d3a723..499abc90f 100644 --- a/src/ray/gcs/gcs_server/gcs_node_manager.cc +++ b/src/ray/gcs/gcs_server/gcs_node_manager.cc @@ -258,6 +258,7 @@ std::shared_ptr GcsNodeManager::RemoveNode( // Remove from cluster resources. gcs_resource_manager_->OnNodeDead(node_id); resources_buffer_.erase(node_id); + node_resource_usages_.erase(node_id); if (!is_intended) { // Broadcast a warning to all of the drivers indicating that the node // has been marked as dead. diff --git a/src/ray/gcs/gcs_server/test/gcs_node_manager_test.cc b/src/ray/gcs/gcs_server/test/gcs_node_manager_test.cc index a904512ac..74c4b8fd1 100644 --- a/src/ray/gcs/gcs_server/test/gcs_node_manager_test.cc +++ b/src/ray/gcs/gcs_server/test/gcs_node_manager_test.cc @@ -41,11 +41,43 @@ TEST_F(GcsNodeManagerTest, TestManagement) { auto node = Mocker::GenNodeInfo(); auto node_id = NodeID::FromBinary(node->node_id()); + { + rpc::GetAllResourceUsageRequest request; + rpc::GetAllResourceUsageReply reply; + auto send_reply_callback = [](ray::Status status, std::function f1, + std::function f2) {}; + node_manager.HandleGetAllResourceUsage(request, &reply, send_reply_callback); + ASSERT_EQ(reply.resource_usage_data().batch().size(), 0); + } + node_manager.AddNode(node); ASSERT_EQ(node, node_manager.GetAliveNode(node_id).value()); + rpc::ReportResourceUsageRequest report_request; + (*report_request.mutable_resources()->mutable_resources_available())["CPU"] = 2; + (*report_request.mutable_resources()->mutable_resources_total())["CPU"] = 2; + node_manager.UpdateNodeResourceUsage(node_id, report_request); + + { + rpc::GetAllResourceUsageRequest request; + rpc::GetAllResourceUsageReply reply; + auto send_reply_callback = [](ray::Status status, std::function f1, + std::function f2) {}; + node_manager.HandleGetAllResourceUsage(request, &reply, send_reply_callback); + ASSERT_EQ(reply.resource_usage_data().batch().size(), 1); + } + node_manager.RemoveNode(node_id); ASSERT_TRUE(!node_manager.GetAliveNode(node_id).has_value()); + + { + rpc::GetAllResourceUsageRequest request; + rpc::GetAllResourceUsageReply reply; + auto send_reply_callback = [](ray::Status status, std::function f1, + std::function f2) {}; + node_manager.HandleGetAllResourceUsage(request, &reply, send_reply_callback); + ASSERT_EQ(reply.resource_usage_data().batch().size(), 0); + } } TEST_F(GcsNodeManagerTest, TestListener) {