mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 10:49:16 +08:00
[Autoscaler] Placement group autoscaling (#11243)
This commit is contained in:
@@ -211,11 +211,14 @@ class StandardAutoscaler:
|
||||
if self.resource_demand_scheduler:
|
||||
resource_demand_vector = self.resource_demand_vector + \
|
||||
self.load_metrics.get_resource_demand_vector()
|
||||
to_launch = (self.resource_demand_scheduler.get_nodes_to_launch(
|
||||
pending_placement_groups = \
|
||||
self.load_metrics.get_pending_placement_groups()
|
||||
to_launch = self.resource_demand_scheduler.get_nodes_to_launch(
|
||||
self.provider.non_terminated_nodes(tag_filters={}),
|
||||
self.pending_launches.breakdown(),
|
||||
resource_demand_vector,
|
||||
self.load_metrics.get_resource_utilization()))
|
||||
self.load_metrics.get_resource_utilization(),
|
||||
pending_placement_groups)
|
||||
for node_type, count in to_launch.items():
|
||||
self.launch_new_node(count, node_type=node_type)
|
||||
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import logging
|
||||
import time
|
||||
from typing import Dict, List
|
||||
|
||||
import numpy as np
|
||||
import ray._private.services as services
|
||||
from ray.autoscaler._private.constants import MEMORY_RESOURCE_UNIT_BYTES
|
||||
from ray.gcs_utils import PlacementGroupTableData
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -26,16 +28,18 @@ class LoadMetrics:
|
||||
) if local_ip is None else local_ip
|
||||
self.waiting_bundles = []
|
||||
self.infeasible_bundles = []
|
||||
self.pending_placement_groups = []
|
||||
|
||||
def update(self,
|
||||
ip,
|
||||
static_resources,
|
||||
update_dynamic_resources,
|
||||
dynamic_resources,
|
||||
update_resource_load,
|
||||
resource_load,
|
||||
waiting_bundles=None,
|
||||
infeasible_bundles=None):
|
||||
ip: str,
|
||||
static_resources: Dict[str, Dict],
|
||||
update_dynamic_resources: bool,
|
||||
dynamic_resources: Dict[str, Dict],
|
||||
update_resource_load: bool,
|
||||
resource_load: Dict[str, Dict],
|
||||
waiting_bundles: List[Dict[str, float]] = None,
|
||||
infeasible_bundles: List[Dict[str, float]] = None,
|
||||
pending_placement_groups: List[PlacementGroupTableData] = None):
|
||||
# If light heartbeat enabled, only resources changed will be received.
|
||||
# We should update the changed part and compare static_resources with
|
||||
# dynamic_resources using those updated.
|
||||
@@ -50,6 +54,8 @@ class LoadMetrics:
|
||||
waiting_bundles = []
|
||||
if not infeasible_bundles:
|
||||
infeasible_bundles = []
|
||||
if not pending_placement_groups:
|
||||
pending_placement_groups = []
|
||||
|
||||
# We are not guaranteed to have a corresponding dynamic resource
|
||||
# for every static resource because dynamic resources are based on
|
||||
@@ -69,6 +75,7 @@ class LoadMetrics:
|
||||
self.last_heartbeat_time_by_ip[ip] = now
|
||||
self.waiting_bundles = waiting_bundles
|
||||
self.infeasible_bundles = infeasible_bundles
|
||||
self.pending_placement_groups = pending_placement_groups
|
||||
|
||||
def mark_active(self, ip):
|
||||
assert ip is not None, "IP should be known at this time"
|
||||
@@ -159,6 +166,9 @@ class LoadMetrics:
|
||||
def get_resource_demand_vector(self):
|
||||
return self.waiting_bundles + self.infeasible_bundles
|
||||
|
||||
def get_pending_placement_groups(self):
|
||||
return self.pending_placement_groups
|
||||
|
||||
def info_string(self):
|
||||
return " - " + "\n - ".join(
|
||||
["{}: {}".format(k, v) for k, v in sorted(self._info().items())])
|
||||
|
||||
@@ -11,9 +11,12 @@ import copy
|
||||
import numpy as np
|
||||
import logging
|
||||
import collections
|
||||
from numbers import Number
|
||||
from typing import List, Dict
|
||||
|
||||
from ray.autoscaler.node_provider import NodeProvider
|
||||
from ray.gcs_utils import PlacementGroupTableData
|
||||
from ray.core.generated.common_pb2 import PlacementStrategy
|
||||
from ray.autoscaler.tags import TAG_RAY_USER_NODE_TYPE, NODE_KIND_UNMANAGED, \
|
||||
STATUS_UPDATE_FAILED, STATUS_UP_TO_DATE, TAG_RAY_NODE_STATUS
|
||||
|
||||
@@ -26,7 +29,7 @@ NodeType = str
|
||||
NodeTypeConfigDict = str
|
||||
|
||||
# e.g., {"GPU": 1}.
|
||||
ResourceDict = str
|
||||
ResourceDict = Dict[str, Number]
|
||||
|
||||
# e.g., IP address of the node.
|
||||
NodeID = str
|
||||
@@ -43,15 +46,19 @@ class ResourceDemandScheduler:
|
||||
def get_nodes_to_launch(
|
||||
self, nodes: List[NodeID], pending_nodes: Dict[NodeType, int],
|
||||
resource_demands: List[ResourceDict],
|
||||
usage_by_ip: Dict[str, ResourceDict]) -> Dict[NodeType, int]:
|
||||
usage_by_ip: Dict[str, ResourceDict],
|
||||
pending_placement_groups: List[PlacementGroupTableData]
|
||||
) -> Dict[NodeType, int]:
|
||||
"""Given resource demands, return node types to add to the cluster.
|
||||
|
||||
This method:
|
||||
(1) calculates the resources present in the cluster.
|
||||
(2) calculates the remaining nodes to add to respect min_workers
|
||||
constraint per node type.
|
||||
(3) calculates the unfulfilled resource bundles.
|
||||
(4) calculates which nodes need to be launched to fulfill all
|
||||
constraint per node type.
|
||||
(3) for each strict spread placement group, reserve space on
|
||||
available nodes and launch new nodes if necessary.
|
||||
(4) calculates the unfulfilled resource bundles.
|
||||
(5) calculates which nodes need to be launched to fulfill all
|
||||
the bundle requests, subject to max_worker constraints.
|
||||
|
||||
Args:
|
||||
@@ -61,31 +68,44 @@ class ResourceDemandScheduler:
|
||||
usage_by_ip: Mapping from ip to available resources.
|
||||
"""
|
||||
|
||||
node_resources: List[ResourceDict]
|
||||
node_type_counts: Dict[NodeType, int]
|
||||
node_resources, node_type_counts = \
|
||||
self.calculate_node_resources(nodes, pending_nodes, usage_by_ip)
|
||||
|
||||
logger.info("Cluster resources: {}".format(node_resources))
|
||||
logger.info("Node counts: {}".format(node_type_counts))
|
||||
# Step 2: add nodes to add to satisfy min_workers for each type
|
||||
node_resources, node_type_counts, min_workers_nodes_to_add = \
|
||||
_add_min_workers_nodes(
|
||||
node_resources, node_type_counts, self.node_types)
|
||||
|
||||
nodes_to_add_based_on_demand = {}
|
||||
if resource_demands is not None:
|
||||
logger.info("Cluster resources: {}".format(node_resources))
|
||||
logger.info("Node counts: {}".format(node_type_counts))
|
||||
unfulfilled = get_bin_pack_residual(node_resources,
|
||||
resource_demands)
|
||||
logger.info("Resource demands: {}".format(resource_demands))
|
||||
logger.info("Unfulfilled demands: {}".format(unfulfilled))
|
||||
max_to_add = self.max_workers - sum(node_type_counts.values())
|
||||
nodes_to_add_based_on_demand = get_nodes_for(
|
||||
self.node_types, node_type_counts, max_to_add, unfulfilled)
|
||||
# Step 3: add nodes for strict spread groups
|
||||
logger.info(f"Placement group demands: {pending_placement_groups}")
|
||||
placement_group_demand_vector, strict_spreads = \
|
||||
placement_groups_to_resource_demands(pending_placement_groups)
|
||||
resource_demands.extend(placement_group_demand_vector)
|
||||
placement_group_nodes_to_add, node_resources, node_type_counts = \
|
||||
self.reserve_and_allocate_spread(
|
||||
strict_spreads, node_resources, node_type_counts)
|
||||
|
||||
# Step 4/5: add nodes for pending tasks, actors, and non-strict spread
|
||||
# groups
|
||||
unfulfilled, _ = get_bin_pack_residual(node_resources,
|
||||
resource_demands)
|
||||
logger.info("Resource demands: {}".format(resource_demands))
|
||||
logger.info("Unfulfilled demands: {}".format(unfulfilled))
|
||||
max_to_add = self.max_workers - sum(node_type_counts.values())
|
||||
nodes_to_add_based_on_demand = get_nodes_for(
|
||||
self.node_types, node_type_counts, max_to_add, unfulfilled)
|
||||
# Merge nodes to add based on demand and nodes to add based on
|
||||
# min_workers constraint. We add them because nodes to add based on
|
||||
# demand was calculated after the min_workers constraint was respected.
|
||||
total_nodes_to_add = {}
|
||||
for node_type in self.node_types:
|
||||
nodes_to_add = nodes_to_add_based_on_demand.get(node_type, 0) + \
|
||||
min_workers_nodes_to_add.get(node_type, 0)
|
||||
nodes_to_add = (min_workers_nodes_to_add.get(
|
||||
node_type, 0) + placement_group_nodes_to_add.get(node_type, 0)
|
||||
+ nodes_to_add_based_on_demand.get(node_type, 0))
|
||||
if nodes_to_add > 0:
|
||||
total_nodes_to_add[node_type] = nodes_to_add
|
||||
|
||||
@@ -220,6 +240,53 @@ class ResourceDemandScheduler:
|
||||
|
||||
return node_resources, node_type_counts
|
||||
|
||||
def reserve_and_allocate_spread(self,
|
||||
strict_spreads: List[List[ResourceDict]],
|
||||
node_resources: List[ResourceDict],
|
||||
node_type_counts: Dict[NodeType, int]):
|
||||
"""For each strict spread, attempt to reserve as much space as possible
|
||||
on the node, then allocate new nodes for the unfulfilled portion.
|
||||
|
||||
Args:
|
||||
strict_spreads (List[List[ResourceDict]]): A list of placement
|
||||
groups which must be spread out.
|
||||
node_resources (List[ResourceDict]): Available node resources in
|
||||
the cluster.
|
||||
node_type_counts (Dict[NodeType, int]): The amount of each type of
|
||||
node pending or in the cluster.
|
||||
|
||||
Returns:
|
||||
Dict[NodeType, int]: Nodes to add.
|
||||
List[ResourceDict]: The updated node_resources after the method.
|
||||
Dict[NodeType, int]: The updated node_type_counts.
|
||||
|
||||
"""
|
||||
to_add = collections.defaultdict(int)
|
||||
for bundles in strict_spreads:
|
||||
# Try to pack as many bundles of this group as possible on existing
|
||||
# nodes. The remaining will be allocated on new nodes.
|
||||
unfulfilled, node_resources = get_bin_pack_residual(
|
||||
node_resources, bundles, strict_spread=True)
|
||||
max_to_add = self.max_workers - sum(node_type_counts.values())
|
||||
# Allocate new nodes for the remaining bundles that don't fit.
|
||||
to_launch = get_nodes_for(
|
||||
self.node_types,
|
||||
node_type_counts,
|
||||
max_to_add,
|
||||
unfulfilled,
|
||||
strict_spread=True)
|
||||
_inplace_add(node_type_counts, to_launch)
|
||||
_inplace_add(to_add, to_launch)
|
||||
new_node_resources = _node_type_counts_to_node_resources(
|
||||
self.node_types, to_launch)
|
||||
# Update node resources to include newly launched nodes and their
|
||||
# bundles.
|
||||
unfulfilled, including_reserved = get_bin_pack_residual(
|
||||
new_node_resources, unfulfilled, strict_spread=True)
|
||||
assert not unfulfilled
|
||||
node_resources += including_reserved
|
||||
return to_add, node_resources, node_type_counts
|
||||
|
||||
def debug_string(self, nodes: List[NodeID],
|
||||
pending_nodes: Dict[NodeID, int],
|
||||
usage_by_ip: Dict[str, ResourceDict]) -> str:
|
||||
@@ -235,6 +302,19 @@ class ResourceDemandScheduler:
|
||||
return out
|
||||
|
||||
|
||||
def _node_type_counts_to_node_resources(
|
||||
node_types: Dict[NodeType, NodeTypeConfigDict],
|
||||
node_type_counts: Dict[NodeType, int]) -> List[ResourceDict]:
|
||||
"""Converts a node_type_counts dict into a list of node_resources."""
|
||||
resources = []
|
||||
for node_type, count in node_type_counts.items():
|
||||
# Be careful, each entry in the list must be deep copied!
|
||||
resources += [
|
||||
node_types[node_type]["resources"].copy() for _ in range(count)
|
||||
]
|
||||
return resources
|
||||
|
||||
|
||||
def _add_min_workers_nodes(
|
||||
node_resources: List[ResourceDict],
|
||||
node_type_counts: Dict[NodeType, int],
|
||||
@@ -269,8 +349,10 @@ def _add_min_workers_nodes(
|
||||
|
||||
|
||||
def get_nodes_for(node_types: Dict[NodeType, NodeTypeConfigDict],
|
||||
existing_nodes: Dict[NodeType, int], max_to_add: int,
|
||||
resources: List[ResourceDict]) -> Dict[NodeType, int]:
|
||||
existing_nodes: Dict[NodeType, int],
|
||||
max_to_add: int,
|
||||
resources: List[ResourceDict],
|
||||
strict_spread: bool = False) -> Dict[NodeType, int]:
|
||||
"""Determine nodes to add given resource demands and constraints.
|
||||
|
||||
Args:
|
||||
@@ -279,12 +361,14 @@ def get_nodes_for(node_types: Dict[NodeType, NodeTypeConfigDict],
|
||||
This sets constraints on the number of new nodes to add.
|
||||
max_to_add: global constraint on nodes to add.
|
||||
resources: resource demands to fulfill.
|
||||
strict_spread: If true, each element in `resources` must be placed on a
|
||||
different node.
|
||||
|
||||
Returns:
|
||||
Dict of count to add for each node type.
|
||||
|
||||
"""
|
||||
nodes_to_add = collections.defaultdict(int)
|
||||
allocated_resources = []
|
||||
|
||||
while resources and sum(nodes_to_add.values()) < max_to_add:
|
||||
utilization_scores = []
|
||||
@@ -293,12 +377,21 @@ def get_nodes_for(node_types: Dict[NodeType, NodeTypeConfigDict],
|
||||
node_type, 0) >= node_types[node_type]["max_workers"]):
|
||||
continue
|
||||
node_resources = node_types[node_type]["resources"]
|
||||
score = _utilization_score(node_resources, resources)
|
||||
if strict_spread:
|
||||
# If handling strict spread, only one bundle can be placed on
|
||||
# the node.
|
||||
score = _utilization_score(node_resources, [resources[0]])
|
||||
else:
|
||||
score = _utilization_score(node_resources, resources)
|
||||
if score is not None:
|
||||
utilization_scores.append((score, node_type))
|
||||
|
||||
# Give up, no feasible node.
|
||||
if not utilization_scores:
|
||||
# TODO (Alex): We will hit this case every time a placement group
|
||||
# starts up because placement groups are scheduled via custom
|
||||
# resources. This will behave properly with the current utilization
|
||||
# score heuristic, but it's a little dangerous and misleading.
|
||||
logger.info(
|
||||
"No feasible node type to add for {}".format(resources))
|
||||
break
|
||||
@@ -306,10 +399,14 @@ def get_nodes_for(node_types: Dict[NodeType, NodeTypeConfigDict],
|
||||
utilization_scores = sorted(utilization_scores, reverse=True)
|
||||
best_node_type = utilization_scores[0][1]
|
||||
nodes_to_add[best_node_type] += 1
|
||||
allocated_resources.append(node_types[best_node_type]["resources"])
|
||||
residual = get_bin_pack_residual(allocated_resources[-1:], resources)
|
||||
assert len(residual) < len(resources), (resources, residual)
|
||||
resources = residual
|
||||
if strict_spread:
|
||||
resources = resources[1:]
|
||||
else:
|
||||
allocated_resource = node_types[best_node_type]["resources"]
|
||||
residual, _ = get_bin_pack_residual([allocated_resource],
|
||||
resources)
|
||||
assert len(residual) < len(resources), (resources, residual)
|
||||
resources = residual
|
||||
|
||||
return nodes_to_add
|
||||
|
||||
@@ -336,9 +433,9 @@ def _utilization_score(node_resources: ResourceDict,
|
||||
return (min(util_by_resources), np.mean(util_by_resources))
|
||||
|
||||
|
||||
def get_bin_pack_residual(
|
||||
node_resources: List[ResourceDict],
|
||||
resource_demands: List[ResourceDict]) -> List[ResourceDict]:
|
||||
def get_bin_pack_residual(node_resources: List[ResourceDict],
|
||||
resource_demands: List[ResourceDict],
|
||||
strict_spread: bool = False) -> List[ResourceDict]:
|
||||
"""Return a subset of resource_demands that cannot fit in the cluster.
|
||||
|
||||
TODO(ekl): this currently does not guarantee the resources will be packed
|
||||
@@ -349,26 +446,37 @@ def get_bin_pack_residual(
|
||||
node_resources (List[ResourceDict]): List of resources per node.
|
||||
resource_demands (List[ResourceDict]): List of resource bundles that
|
||||
need to be bin packed onto the nodes.
|
||||
strict_spread (bool): If true, each element in resource_demands must be
|
||||
placed on a different entry in `node_resources`.
|
||||
|
||||
Returns:
|
||||
List[ResourceDict] the residual list resources that do not fit.
|
||||
|
||||
"""
|
||||
|
||||
unfulfilled = []
|
||||
|
||||
# A most naive bin packing algorithm.
|
||||
nodes = copy.deepcopy(node_resources)
|
||||
used = []
|
||||
for demand in resource_demands:
|
||||
found = False
|
||||
for node in nodes:
|
||||
node = None
|
||||
for i in range(len(nodes)):
|
||||
node = nodes[i]
|
||||
if _fits(node, demand):
|
||||
_inplace_subtract(node, demand)
|
||||
found = True
|
||||
# In the strict_spread case, we can't reuse nodes.
|
||||
if strict_spread:
|
||||
used.append(node)
|
||||
del nodes[i]
|
||||
break
|
||||
if not found:
|
||||
if found and node:
|
||||
_inplace_subtract(node, demand)
|
||||
else:
|
||||
unfulfilled.append(demand)
|
||||
|
||||
return unfulfilled
|
||||
return unfulfilled, nodes + used
|
||||
|
||||
|
||||
def _fits(node: ResourceDict, resources: ResourceDict) -> bool:
|
||||
@@ -383,3 +491,54 @@ def _inplace_subtract(node: ResourceDict, resources: ResourceDict) -> None:
|
||||
assert k in node, (k, node)
|
||||
node[k] -= v
|
||||
assert node[k] >= 0.0, (node, k, v)
|
||||
|
||||
|
||||
def _inplace_add(a: collections.defaultdict, b: Dict) -> None:
|
||||
"""Generically adds values in `b` to `a`.
|
||||
a[k] should be defined for all k in b.keys()"""
|
||||
for k, v in b.items():
|
||||
a[k] += v
|
||||
|
||||
|
||||
def placement_groups_to_resource_demands(
|
||||
pending_placement_groups: List[PlacementGroupTableData]):
|
||||
"""Preprocess placement group requests into regular resource demand vectors
|
||||
when possible. The policy is:
|
||||
* STRICT_PACK - Convert to a single bundle.
|
||||
* PACK - Flatten into a resource demand vector.
|
||||
* STRICT_SPREAD - Cannot be converted.
|
||||
* SPREAD - Flatten into a resource demand vector.
|
||||
|
||||
Args:
|
||||
pending_placement_groups (List[PlacementGroupData]): List of
|
||||
PlacementGroupLoad's.
|
||||
|
||||
Returns:
|
||||
List[ResourceDict]: The placement groups which were converted to a
|
||||
resource demand vector.
|
||||
List[List[ResourceDict]]: The placement groups which should be strictly
|
||||
spread.
|
||||
"""
|
||||
resource_demand_vector = []
|
||||
unconverted = []
|
||||
for placement_group in pending_placement_groups:
|
||||
shapes = [
|
||||
dict(bundle.unit_resources) for bundle in placement_group.bundles
|
||||
]
|
||||
if (placement_group.strategy == PlacementStrategy.PACK
|
||||
or placement_group.strategy == PlacementStrategy.SPREAD):
|
||||
resource_demand_vector.extend(shapes)
|
||||
elif placement_group.strategy == PlacementStrategy.STRICT_PACK:
|
||||
combined = collections.defaultdict(float)
|
||||
for shape in shapes:
|
||||
for label, quantity in shape.items():
|
||||
combined[label] += quantity
|
||||
resource_demand_vector.append(combined)
|
||||
elif (placement_group.strategy == PlacementStrategy.STRICT_SPREAD):
|
||||
unconverted.append(shapes)
|
||||
else:
|
||||
logger.error(
|
||||
f"Unknown placement group request type: {placement_group}. "
|
||||
f"Please file a bug report "
|
||||
f"https://github.com/ray-project/ray/issues/new.")
|
||||
return resource_demand_vector, unconverted
|
||||
|
||||
@@ -143,6 +143,9 @@ class Monitor:
|
||||
waiting_bundles, infeasible_bundles = \
|
||||
parse_resource_demands(message.resource_load_by_shape)
|
||||
|
||||
pending_placement_groups = list(
|
||||
message.placement_group_load.placement_group_data)
|
||||
|
||||
# Update the load metrics for this raylet.
|
||||
client_id = ray.utils.binary_to_hex(heartbeat_message.client_id)
|
||||
ip = self.raylet_id_to_ip_map.get(client_id)
|
||||
@@ -154,7 +157,8 @@ class Monitor:
|
||||
self.load_metrics.update(
|
||||
ip, total_resources, update_available_resources,
|
||||
available_resources, update_resource_load, resource_load,
|
||||
waiting_bundles, infeasible_bundles)
|
||||
waiting_bundles, infeasible_bundles,
|
||||
pending_placement_groups)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Monitor: could not find ip for client {client_id}")
|
||||
|
||||
@@ -17,6 +17,8 @@ from ray.autoscaler._private.commands import get_or_create_head_node
|
||||
from ray.autoscaler._private.resource_demand_scheduler import \
|
||||
_utilization_score, _add_min_workers_nodes, \
|
||||
get_bin_pack_residual, get_nodes_for, ResourceDemandScheduler
|
||||
from ray.gcs_utils import PlacementGroupTableData
|
||||
from ray.core.generated.common_pb2 import Bundle, PlacementStrategy
|
||||
from ray.autoscaler.tags import TAG_RAY_USER_NODE_TYPE, TAG_RAY_NODE_KIND, \
|
||||
NODE_KIND_WORKER, TAG_RAY_NODE_STATUS, \
|
||||
STATUS_UP_TO_DATE, STATUS_UNINITIALIZED
|
||||
@@ -94,15 +96,44 @@ def test_util_score():
|
||||
|
||||
|
||||
def test_bin_pack():
|
||||
assert get_bin_pack_residual([], [{"GPU": 2}, {"GPU": 2}]) == \
|
||||
assert get_bin_pack_residual([], [{"GPU": 2}, {"GPU": 2}])[0] == \
|
||||
[{"GPU": 2}, {"GPU": 2}]
|
||||
assert get_bin_pack_residual([{"GPU": 2}], [{"GPU": 2}, {"GPU": 2}]) == \
|
||||
[{"GPU": 2}]
|
||||
assert get_bin_pack_residual([{"GPU": 4}], [{"GPU": 2}, {"GPU": 2}]) == []
|
||||
assert get_bin_pack_residual([{"GPU": 2}], [{"GPU": 2}, {"GPU": 2}])[0] \
|
||||
== [{"GPU": 2}]
|
||||
assert get_bin_pack_residual([{
|
||||
"GPU": 4
|
||||
}], [{
|
||||
"GPU": 2
|
||||
}, {
|
||||
"GPU": 2
|
||||
}])[0] == []
|
||||
arg = [{"GPU": 2}, {"GPU": 2, "CPU": 2}]
|
||||
assert get_bin_pack_residual(arg, [{"GPU": 2}, {"GPU": 2}]) == []
|
||||
assert get_bin_pack_residual(arg, [{"GPU": 2}, {"GPU": 2}])[0] == []
|
||||
arg = [{"CPU": 2}, {"GPU": 2}]
|
||||
assert get_bin_pack_residual(arg, [{"GPU": 2}, {"GPU": 2}]) == [{"GPU": 2}]
|
||||
assert get_bin_pack_residual(arg, [{
|
||||
"GPU": 2
|
||||
}, {
|
||||
"GPU": 2
|
||||
}])[0] == [{
|
||||
"GPU": 2
|
||||
}]
|
||||
arg = [{"GPU": 3}]
|
||||
assert get_bin_pack_residual(
|
||||
arg, [{
|
||||
"GPU": 1
|
||||
}, {
|
||||
"GPU": 1
|
||||
}], strict_spread=False)[0] == []
|
||||
assert get_bin_pack_residual(
|
||||
arg, [{
|
||||
"GPU": 1
|
||||
}, {
|
||||
"GPU": 1
|
||||
}], strict_spread=True) == ([{
|
||||
"GPU": 1
|
||||
}], [{
|
||||
"GPU": 2
|
||||
}])
|
||||
|
||||
|
||||
def test_get_nodes_packing_heuristic():
|
||||
@@ -135,6 +166,18 @@ def test_get_nodes_packing_heuristic():
|
||||
assert get_nodes_for(
|
||||
TYPES_A, {}, 9999, ([{"GPU": 1}] * 8) + ([{"CPU": 1}] * 64)) == \
|
||||
{"m4.16xlarge": 1, "p2.8xlarge": 1}
|
||||
assert get_nodes_for(
|
||||
TYPES_A, {}, 9999, [{
|
||||
"GPU": 1
|
||||
}] * 8, strict_spread=False) == {
|
||||
"p2.8xlarge": 1
|
||||
}
|
||||
assert get_nodes_for(
|
||||
TYPES_A, {}, 9999, [{
|
||||
"GPU": 1
|
||||
}] * 8, strict_spread=True) == {
|
||||
"p2.xlarge": 8
|
||||
}
|
||||
|
||||
|
||||
def test_get_nodes_respects_max_limit():
|
||||
@@ -248,7 +291,7 @@ def test_get_nodes_to_launch_with_min_workers():
|
||||
|
||||
to_launch = scheduler.get_nodes_to_launch(nodes, {}, [{
|
||||
"GPU": 8
|
||||
}], utilizations)
|
||||
}], utilizations, [])
|
||||
assert to_launch == {"p2.8xlarge": 1}
|
||||
|
||||
|
||||
@@ -270,7 +313,7 @@ def test_get_nodes_to_launch_with_min_workers_and_bin_packing():
|
||||
# requires 2 p2.8xls (only 2 are in cluster/pending) and 1 p2.xlarge
|
||||
demands = [{"GPU": 8}] * (len(utilizations) + 1) + [{"GPU": 1}]
|
||||
to_launch = scheduler.get_nodes_to_launch(nodes, pending_nodes, demands,
|
||||
utilizations)
|
||||
utilizations, [])
|
||||
assert to_launch == {"p2.xlarge": 1}
|
||||
|
||||
# 3 min_workers of p2.8xlarge covers the 2 p2.8xlarge + 1 p2.xlarge demand.
|
||||
@@ -279,7 +322,7 @@ def test_get_nodes_to_launch_with_min_workers_and_bin_packing():
|
||||
new_types["p2.8xlarge"]["min_workers"] = 3
|
||||
scheduler = ResourceDemandScheduler(provider, new_types, 10)
|
||||
to_launch = scheduler.get_nodes_to_launch(nodes, pending_nodes, demands,
|
||||
utilizations)
|
||||
utilizations, [])
|
||||
# Make sure it does not return [("p2.8xlarge", 1), ("p2.xlarge", 1)]
|
||||
assert to_launch == {"p2.8xlarge": 1}
|
||||
|
||||
@@ -297,7 +340,7 @@ def test_get_nodes_to_launch_limits():
|
||||
|
||||
to_launch = scheduler.get_nodes_to_launch(nodes, {"p2.8xlarge": 1}, [{
|
||||
"GPU": 8
|
||||
}] * 2, utilizations)
|
||||
}] * 2, utilizations, [])
|
||||
assert to_launch == {}
|
||||
|
||||
|
||||
@@ -317,11 +360,101 @@ def test_calculate_node_resources():
|
||||
# requires 4 p2.8xls (only 3 are in cluster/pending)
|
||||
demands = [{"GPU": 8}] * (len(utilizations) + 2)
|
||||
to_launch = scheduler.get_nodes_to_launch(nodes, pending_nodes, demands,
|
||||
utilizations)
|
||||
utilizations, [])
|
||||
|
||||
assert to_launch == {"p2.8xlarge": 1}
|
||||
|
||||
|
||||
class TestPlacementGroupScaling:
|
||||
def test_strategies(self):
|
||||
provider = MockProvider()
|
||||
scheduler = ResourceDemandScheduler(provider, TYPES_A, 10)
|
||||
|
||||
provider.create_node({}, {TAG_RAY_USER_NODE_TYPE: "p2.8xlarge"}, 2)
|
||||
# At this point our cluster has 2 p2.8xlarge instances (16 GPUs) and is
|
||||
# fully idle.
|
||||
nodes = provider.non_terminated_nodes({})
|
||||
|
||||
resource_demands = [{"GPU": 4}] * 2
|
||||
pending_placement_groups = [
|
||||
# Requires a new node (only uses 2 GPUs on it though).
|
||||
PlacementGroupTableData(
|
||||
state=PlacementGroupTableData.PENDING,
|
||||
strategy=PlacementStrategy.STRICT_SPREAD,
|
||||
bundles=[
|
||||
Bundle(unit_resources={"GPU": 2}),
|
||||
Bundle(unit_resources={"GPU": 2}),
|
||||
Bundle(unit_resources={"GPU": 2})
|
||||
]),
|
||||
# Requires a new node (uses the whole node).
|
||||
PlacementGroupTableData(
|
||||
state=PlacementGroupTableData.PENDING,
|
||||
strategy=PlacementStrategy.STRICT_PACK,
|
||||
bundles=([Bundle(unit_resources={"GPU": 2})] * 4)),
|
||||
# Fits across the machines that strict spread.
|
||||
PlacementGroupTableData(
|
||||
# runs on.
|
||||
state=PlacementGroupTableData.PENDING,
|
||||
strategy=PlacementStrategy.PACK,
|
||||
bundles=([Bundle(unit_resources={"GPU": 2})] * 2)),
|
||||
# Fits across the machines that strict spread.
|
||||
PlacementGroupTableData(
|
||||
# runs on.
|
||||
state=PlacementGroupTableData.PENDING,
|
||||
strategy=PlacementStrategy.SPREAD,
|
||||
bundles=([Bundle(unit_resources={"GPU": 2})] * 2)),
|
||||
]
|
||||
to_launch = scheduler.get_nodes_to_launch(nodes, {}, resource_demands,
|
||||
{}, pending_placement_groups)
|
||||
assert to_launch == {"p2.8xlarge": 2}
|
||||
|
||||
def test_many_strict_spreads(self):
|
||||
provider = MockProvider()
|
||||
scheduler = ResourceDemandScheduler(provider, TYPES_A, 10)
|
||||
|
||||
provider.create_node({}, {TAG_RAY_USER_NODE_TYPE: "p2.8xlarge"}, 2)
|
||||
# At this point our cluster has 2 p2.8xlarge instances (16 GPUs) and is
|
||||
# fully idle.
|
||||
nodes = provider.non_terminated_nodes({})
|
||||
|
||||
resource_demands = [{"GPU": 1}] * 6
|
||||
pending_placement_groups = [
|
||||
# Requires a new node (only uses 2 GPUs on it though).
|
||||
PlacementGroupTableData(
|
||||
state=PlacementGroupTableData.PENDING,
|
||||
strategy=PlacementStrategy.STRICT_SPREAD,
|
||||
bundles=[Bundle(unit_resources={"GPU": 2})] * 3),
|
||||
]
|
||||
# Each placement group will take up 2 GPUs per node, but the distinct
|
||||
# placement groups should still reuse the same nodes.
|
||||
pending_placement_groups = pending_placement_groups * 3
|
||||
to_launch = scheduler.get_nodes_to_launch(nodes, {}, resource_demands,
|
||||
{}, pending_placement_groups)
|
||||
assert to_launch == {"p2.8xlarge": 1}
|
||||
|
||||
def test_packing(self):
|
||||
provider = MockProvider()
|
||||
scheduler = ResourceDemandScheduler(provider, TYPES_A, 10)
|
||||
|
||||
provider.create_node({}, {TAG_RAY_USER_NODE_TYPE: "p2.8xlarge"}, 1)
|
||||
# At this point our cluster has 1 p2.8xlarge instances (8 GPUs) and is
|
||||
# fully idle.
|
||||
nodes = provider.non_terminated_nodes({})
|
||||
|
||||
resource_demands = [{"GPU": 1}] * 2
|
||||
pending_placement_groups = [
|
||||
PlacementGroupTableData(
|
||||
state=PlacementGroupTableData.PENDING,
|
||||
strategy=PlacementStrategy.STRICT_PACK,
|
||||
bundles=[Bundle(unit_resources={"GPU": 2})] * 3),
|
||||
]
|
||||
# The 2 resource demand gpus should still be packed onto the same node
|
||||
# as the 6 GPU placement group.
|
||||
to_launch = scheduler.get_nodes_to_launch(nodes, {}, resource_demands,
|
||||
{}, pending_placement_groups)
|
||||
assert to_launch == {}
|
||||
|
||||
|
||||
def test_get_concurrent_resource_demand_to_launch():
|
||||
node_types = copy.deepcopy(TYPES_A)
|
||||
node_types["p2.8xlarge"]["min_workers"] = 1
|
||||
@@ -441,7 +574,7 @@ def test_get_nodes_to_launch_max_launch_concurrency():
|
||||
|
||||
scheduler = ResourceDemandScheduler(provider, new_types, 30)
|
||||
|
||||
to_launch = scheduler.get_nodes_to_launch([], {}, [], [])
|
||||
to_launch = scheduler.get_nodes_to_launch([], {}, [], [], [])
|
||||
# Respects min_workers despite concurrency limitation.
|
||||
assert to_launch == {"p2.8xlarge": 4}
|
||||
|
||||
@@ -456,7 +589,7 @@ def test_get_nodes_to_launch_max_launch_concurrency():
|
||||
# requires 41 p2.8xls (currently 1 pending, 1 launching, 0 running}
|
||||
demands = [{"GPU": 8}] * (len(utilizations) + 40)
|
||||
to_launch = scheduler.get_nodes_to_launch(nodes, launching_nodes, demands,
|
||||
utilizations)
|
||||
utilizations, [])
|
||||
# Enforces max launch to 5 when < 5 running. 2 are pending/launching.
|
||||
assert to_launch == {"p2.8xlarge": 3}
|
||||
|
||||
@@ -471,7 +604,7 @@ def test_get_nodes_to_launch_max_launch_concurrency():
|
||||
# requires 17 p2.8xls (currently 1 pending, 1 launching, 8 running}
|
||||
demands = [{"GPU": 8}] * (len(utilizations) + 15)
|
||||
to_launch = scheduler.get_nodes_to_launch(nodes, launching_nodes, demands,
|
||||
utilizations)
|
||||
utilizations, [])
|
||||
# We are allowed to launch up to 8 more since 8 are running.
|
||||
# We already have 2 pending/launching, so only 6 remain.
|
||||
assert to_launch == {"p2.8xlarge": 6}
|
||||
@@ -496,6 +629,25 @@ class LoadMetricsTest(unittest.TestCase):
|
||||
"GPU": 1
|
||||
}])
|
||||
|
||||
def testPlacementGroupLoad(self):
|
||||
lm = LoadMetrics()
|
||||
pending_placement_groups = [
|
||||
PlacementGroupTableData(
|
||||
state=PlacementGroupTableData.RESCHEDULING,
|
||||
strategy=PlacementStrategy.PACK,
|
||||
bundles=([Bundle(unit_resources={"GPU": 2})] * 2)),
|
||||
PlacementGroupTableData(
|
||||
state=PlacementGroupTableData.RESCHEDULING,
|
||||
strategy=PlacementStrategy.SPREAD,
|
||||
bundles=([Bundle(unit_resources={"GPU": 2})] * 2)),
|
||||
]
|
||||
lm.update(
|
||||
"1.1.1.1", {},
|
||||
True, {},
|
||||
True, {},
|
||||
pending_placement_groups=pending_placement_groups)
|
||||
assert lm.get_pending_placement_groups() == pending_placement_groups
|
||||
|
||||
|
||||
class AutoscalingTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
@@ -577,6 +729,75 @@ class AutoscalingTest(unittest.TestCase):
|
||||
autoscaler.update()
|
||||
self.waitForNodes(2)
|
||||
|
||||
def testPlacementGroup(self):
|
||||
# Note this is mostly an integration test. See
|
||||
# testPlacementGroupScaling for more comprehensive tests.
|
||||
config = copy.deepcopy(MULTI_WORKER_CLUSTER)
|
||||
config["min_workers"] = 0
|
||||
config["max_workers"] = 999
|
||||
config_path = self.write_config(config)
|
||||
self.provider = MockProvider()
|
||||
runner = MockProcessRunner()
|
||||
lm = LoadMetrics()
|
||||
autoscaler = StandardAutoscaler(
|
||||
config_path,
|
||||
lm,
|
||||
max_failures=0,
|
||||
process_runner=runner,
|
||||
update_interval_s=0)
|
||||
self.provider.create_node({}, {
|
||||
TAG_RAY_NODE_KIND: "head",
|
||||
TAG_RAY_USER_NODE_TYPE: "m4.4xlarge"
|
||||
}, 1)
|
||||
head_ip = self.provider.non_terminated_node_ips({})[0]
|
||||
assert len(self.provider.non_terminated_nodes({})) == 1
|
||||
autoscaler.update()
|
||||
self.waitForNodes(1)
|
||||
|
||||
pending_placement_groups = [
|
||||
PlacementGroupTableData(
|
||||
state=PlacementGroupTableData.RESCHEDULING,
|
||||
strategy=PlacementStrategy.STRICT_SPREAD,
|
||||
bundles=[Bundle(unit_resources={"GPU": 2})] * 3),
|
||||
PlacementGroupTableData(
|
||||
state=PlacementGroupTableData.RESCHEDULING,
|
||||
strategy=PlacementStrategy.PACK,
|
||||
bundles=([Bundle(unit_resources={"GPU": 2})] * 5)),
|
||||
]
|
||||
# Since placement groups are implemented with custom resources, this is
|
||||
# an example of the accompanying resource demands. Note the resource
|
||||
# demand autoscaler will be unable to fulfill these demands, but we
|
||||
# should still handle the other infeasible/waiting bundles.
|
||||
placement_group_resource_demands = [{
|
||||
"GPU_group_0_6c2506ac733bc37496295b02c4fad446": 0.0101,
|
||||
"GPU_group_6c2506ac733bc37496295b02c4fad446": 0.0101
|
||||
}]
|
||||
lm.update(
|
||||
head_ip, {"CPU": 16},
|
||||
True, {"CPU": 16},
|
||||
False, {},
|
||||
infeasible_bundles=placement_group_resource_demands,
|
||||
waiting_bundles=[{
|
||||
"GPU": 8
|
||||
}],
|
||||
pending_placement_groups=pending_placement_groups)
|
||||
autoscaler.update()
|
||||
self.waitForNodes(5)
|
||||
|
||||
for i in range(1, 5):
|
||||
assert self.provider.mock_nodes[i].node_type == "p2.8xlarge"
|
||||
|
||||
pending_placement_groups = [
|
||||
PlacementGroupTableData(
|
||||
state=PlacementGroupTableData.RESCHEDULING,
|
||||
strategy=PlacementStrategy.STRICT_PACK,
|
||||
bundles=([Bundle(unit_resources={"GPU": 2})] * 4)),
|
||||
PlacementGroupTableData(
|
||||
state=PlacementGroupTableData.RESCHEDULING,
|
||||
strategy=PlacementStrategy.SPREAD,
|
||||
bundles=([Bundle(unit_resources={"GPU": 2})] * 2)),
|
||||
]
|
||||
|
||||
def testScaleUpMinWorkers(self):
|
||||
config = copy.deepcopy(MULTI_WORKER_CLUSTER)
|
||||
config["min_workers"] = 2
|
||||
|
||||
Reference in New Issue
Block a user