mirror of
https://github.com/wassname/ray.git
synced 2026-06-29 06:33:06 +08:00
[Autoscaler/Core] Remove autoscaler spam (#12952)
This commit is contained in:
@@ -79,27 +79,27 @@ class LoadMetrics:
|
||||
active_ips = set(active_ips)
|
||||
active_ips.add(self.local_ip)
|
||||
|
||||
def prune(mapping):
|
||||
def prune(mapping, should_log):
|
||||
unwanted = set(mapping) - active_ips
|
||||
for unwanted_key in unwanted:
|
||||
# TODO (Alex): Change this back to info after #12138.
|
||||
logger.debug("LoadMetrics: "
|
||||
"Removed mapping: {} - {}".format(
|
||||
unwanted_key, mapping[unwanted_key]))
|
||||
if should_log:
|
||||
logger.info("LoadMetrics: "
|
||||
"Removed mapping: {} - {}".format(
|
||||
unwanted_key, mapping[unwanted_key]))
|
||||
del mapping[unwanted_key]
|
||||
if unwanted:
|
||||
if unwanted and should_log:
|
||||
# TODO (Alex): Change this back to info after #12138.
|
||||
logger.debug(
|
||||
logger.info(
|
||||
"LoadMetrics: "
|
||||
"Removed {} stale ip mappings: {} not in {}".format(
|
||||
len(unwanted), unwanted, active_ips))
|
||||
assert not (unwanted & set(mapping))
|
||||
|
||||
prune(self.last_used_time_by_ip)
|
||||
prune(self.static_resources_by_ip)
|
||||
prune(self.dynamic_resources_by_ip)
|
||||
prune(self.resource_load_by_ip)
|
||||
prune(self.last_heartbeat_time_by_ip)
|
||||
prune(self.last_used_time_by_ip, should_log=True)
|
||||
prune(self.static_resources_by_ip, should_log=False)
|
||||
prune(self.dynamic_resources_by_ip, should_log=False)
|
||||
prune(self.resource_load_by_ip, should_log=False)
|
||||
prune(self.last_heartbeat_time_by_ip, should_log=False)
|
||||
|
||||
def get_node_resources(self):
|
||||
"""Return a list of node resources (static resource sizes).
|
||||
|
||||
@@ -258,6 +258,7 @@ std::shared_ptr<rpc::GcsNodeInfo> GcsNodeManager::RemoveNode(
|
||||
// Remove from cluster resources.
|
||||
gcs_resource_manager_->OnNodeDead(node_id);
|
||||
resources_buffer_.erase(node_id);
|
||||
node_resource_usages_.erase(node_id);
|
||||
if (!is_intended) {
|
||||
// Broadcast a warning to all of the drivers indicating that the node
|
||||
// has been marked as dead.
|
||||
|
||||
@@ -41,11 +41,43 @@ TEST_F(GcsNodeManagerTest, TestManagement) {
|
||||
auto node = Mocker::GenNodeInfo();
|
||||
auto node_id = NodeID::FromBinary(node->node_id());
|
||||
|
||||
{
|
||||
rpc::GetAllResourceUsageRequest request;
|
||||
rpc::GetAllResourceUsageReply reply;
|
||||
auto send_reply_callback = [](ray::Status status, std::function<void()> f1,
|
||||
std::function<void()> f2) {};
|
||||
node_manager.HandleGetAllResourceUsage(request, &reply, send_reply_callback);
|
||||
ASSERT_EQ(reply.resource_usage_data().batch().size(), 0);
|
||||
}
|
||||
|
||||
node_manager.AddNode(node);
|
||||
ASSERT_EQ(node, node_manager.GetAliveNode(node_id).value());
|
||||
|
||||
rpc::ReportResourceUsageRequest report_request;
|
||||
(*report_request.mutable_resources()->mutable_resources_available())["CPU"] = 2;
|
||||
(*report_request.mutable_resources()->mutable_resources_total())["CPU"] = 2;
|
||||
node_manager.UpdateNodeResourceUsage(node_id, report_request);
|
||||
|
||||
{
|
||||
rpc::GetAllResourceUsageRequest request;
|
||||
rpc::GetAllResourceUsageReply reply;
|
||||
auto send_reply_callback = [](ray::Status status, std::function<void()> f1,
|
||||
std::function<void()> f2) {};
|
||||
node_manager.HandleGetAllResourceUsage(request, &reply, send_reply_callback);
|
||||
ASSERT_EQ(reply.resource_usage_data().batch().size(), 1);
|
||||
}
|
||||
|
||||
node_manager.RemoveNode(node_id);
|
||||
ASSERT_TRUE(!node_manager.GetAliveNode(node_id).has_value());
|
||||
|
||||
{
|
||||
rpc::GetAllResourceUsageRequest request;
|
||||
rpc::GetAllResourceUsageReply reply;
|
||||
auto send_reply_callback = [](ray::Status status, std::function<void()> f1,
|
||||
std::function<void()> f2) {};
|
||||
node_manager.HandleGetAllResourceUsage(request, &reply, send_reply_callback);
|
||||
ASSERT_EQ(reply.resource_usage_data().batch().size(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(GcsNodeManagerTest, TestListener) {
|
||||
|
||||
Reference in New Issue
Block a user