mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 02:01:24 +08:00
[autoscaler] hotfix calculate_node_resources (#10874)
This commit is contained in:
@@ -56,6 +56,7 @@ class ResourceDemandScheduler:
|
||||
nodes: List of existing nodes in the cluster.
|
||||
pending_nodes: Summary of node types currently being launched.
|
||||
resource_demands: Vector of resource demands from the scheduler.
|
||||
usage_by_ip: Mapping from ip to available resources.
|
||||
"""
|
||||
|
||||
if resource_demands is None:
|
||||
@@ -113,7 +114,6 @@ class ResourceDemandScheduler:
|
||||
tags = self.provider.node_tags(node_id)
|
||||
if TAG_RAY_USER_NODE_TYPE in tags:
|
||||
node_type = tags[TAG_RAY_USER_NODE_TYPE]
|
||||
node_type_counts[node_type] += 1
|
||||
ip = self.provider.internal_ip(node_id)
|
||||
available_resources = usage_by_ip.get(ip)
|
||||
add_node(node_type, available_resources)
|
||||
|
||||
@@ -180,6 +180,27 @@ def test_get_nodes_to_launch_limits():
|
||||
assert to_launch == []
|
||||
|
||||
|
||||
def test_calculate_node_resources():
|
||||
provider = MockProvider()
|
||||
scheduler = ResourceDemandScheduler(provider, TYPES_A, 10)
|
||||
|
||||
provider.create_node({}, {TAG_RAY_USER_NODE_TYPE: "p2.8xlarge"}, 2)
|
||||
|
||||
nodes = provider.non_terminated_nodes({})
|
||||
|
||||
ips = provider.non_terminated_node_ips({})
|
||||
# 2 free p2.8xls
|
||||
utilizations = {ip: {"GPU": 8} for ip in ips}
|
||||
# 1 more on the way
|
||||
pending_nodes = {"p2.8xlarge": 1}
|
||||
# requires 4 p2.8xls (only 3 are in cluster/pending)
|
||||
demands = [{"GPU": 8}] * (len(utilizations) + 2)
|
||||
to_launch = scheduler.get_nodes_to_launch(nodes, pending_nodes, demands,
|
||||
utilizations)
|
||||
|
||||
assert to_launch == [("p2.8xlarge", 1)]
|
||||
|
||||
|
||||
class LoadMetricsTest(unittest.TestCase):
|
||||
def testResourceDemandVector(self):
|
||||
lm = LoadMetrics()
|
||||
|
||||
Reference in New Issue
Block a user