[autoscaler] Support min_workers for multi node type (#11041)

* prepare for head node

* move command runner interface outside _private

* remove space

* Eric

* flake

* min_workers in multi node type

* fixing edge cases

* eric not idle

* fix target_workers to consider min_workers of node types

* idle timeout

* minor

* minor fix

* test

* lint

* eric v2

* eric 3

* min_workers constraint before bin packing

* Update resource_demand_scheduler.py

* Revert "Update resource_demand_scheduler.py"

This reverts commit 818a63a2c86d8437b3ef21c5035d701c1d1127b5.

* reducing diff

Co-authored-by: Ameer Haj Ali <ameerhajali@ameers-mbp.lan>
Co-authored-by: Alex Wu <alex@anyscale.io>
Co-authored-by: Alex Wu <itswu.alex@gmail.com>
This commit is contained in:
Ameer Haj Ali
2020-09-28 22:02:01 -07:00
committed by GitHub
parent 0a6164ab15
commit 0d36e4c025
6 changed files with 286 additions and 36 deletions
@@ -4,6 +4,7 @@ import yaml
import tempfile
import shutil
import unittest
import copy
import ray
from ray.tests.test_autoscaler import SMALL_CLUSTER, MockProvider, \
@@ -13,9 +14,10 @@ from ray.autoscaler._private.autoscaler import StandardAutoscaler
from ray.autoscaler._private.load_metrics import LoadMetrics
from ray.autoscaler._private.commands import get_or_create_head_node
from ray.autoscaler._private.resource_demand_scheduler import \
_utilization_score, \
_utilization_score, _add_min_workers_nodes, \
get_bin_pack_residual, get_nodes_for, ResourceDemandScheduler
from ray.autoscaler.tags import TAG_RAY_USER_NODE_TYPE, TAG_RAY_NODE_KIND
from ray.autoscaler.tags import TAG_RAY_USER_NODE_TYPE, TAG_RAY_NODE_KIND, \
NODE_KIND_WORKER
from ray.test_utils import same_elements
from time import sleep
@@ -164,6 +166,116 @@ def test_get_nodes_respects_max_limit():
}] * 10) == [("m4.large", 2)]
def test_add_min_workers_nodes():
types = {
"m2.large": {
"resources": {
"CPU": 2
},
"min_workers": 50,
"max_workers": 100,
},
"m4.large": {
"resources": {
"CPU": 2
},
"min_workers": 0,
"max_workers": 10,
},
"gpu": {
"resources": {
"GPU": 1
},
"min_workers": 99999,
"max_workers": 99999,
},
}
assert _add_min_workers_nodes([],
{},
types) == \
([{"CPU": 2}]*50+[{"GPU": 1}]*99999, {"m2.large": 50, "gpu": 99999},
[("m2.large", 50), ("gpu", 99999)])
assert _add_min_workers_nodes([{"CPU": 2}]*5,
{"m2.large": 5},
types) == \
([{"CPU": 2}]*50+[{"GPU": 1}]*99999, {"m2.large": 50, "gpu": 99999},
[("m2.large", 45), ("gpu", 99999)])
assert _add_min_workers_nodes([{"CPU": 2}]*60,
{"m2.large": 60},
types) == \
([{"CPU": 2}]*60+[{"GPU": 1}]*99999, {"m2.large": 60, "gpu": 99999},
[("gpu", 99999)])
assert _add_min_workers_nodes([{
"CPU": 2
}] * 50 + [{
"GPU": 1
}] * 99999, {
"m2.large": 50,
"gpu": 99999
}, types) == ([{
"CPU": 2
}] * 50 + [{
"GPU": 1
}] * 99999, {
"m2.large": 50,
"gpu": 99999
}, [])
def test_get_nodes_to_launch_with_min_workers():
provider = MockProvider()
new_types = copy.deepcopy(TYPES_A)
new_types["p2.8xlarge"]["min_workers"] = 2
scheduler = ResourceDemandScheduler(provider, new_types, 3)
provider.create_node({}, {TAG_RAY_USER_NODE_TYPE: "p2.8xlarge"}, 1)
nodes = provider.non_terminated_nodes({})
ips = provider.non_terminated_node_ips({})
utilizations = {ip: {"GPU": 8} for ip in ips}
to_launch = scheduler.get_nodes_to_launch(nodes, {}, [{
"GPU": 8
}], utilizations)
assert to_launch == [("p2.8xlarge", 1)]
def test_get_nodes_to_launch_with_min_workers_and_bin_packing():
provider = MockProvider()
new_types = copy.deepcopy(TYPES_A)
new_types["p2.8xlarge"]["min_workers"] = 2
scheduler = ResourceDemandScheduler(provider, new_types, 10)
provider.create_node({}, {TAG_RAY_USER_NODE_TYPE: "p2.8xlarge"}, 1)
nodes = provider.non_terminated_nodes({})
ips = provider.non_terminated_node_ips({})
# 1 free p2.8xls
utilizations = {ip: {"GPU": 8} for ip in ips}
# 1 more on the way
pending_nodes = {"p2.8xlarge": 1}
# requires 2 p2.8xls (only 2 are in cluster/pending) and 1 p2.xlarge
demands = [{"GPU": 8}] * (len(utilizations) + 1) + [{"GPU": 1}]
to_launch = scheduler.get_nodes_to_launch(nodes, pending_nodes, demands,
utilizations)
assert to_launch == [("p2.xlarge", 1)]
# 3 min_workers of p2.8xlarge covers the 2 p2.8xlarge + 1 p2.xlarge demand.
# 2 p2.8xlarge are running/pending. So we need 1 more p2.8xlarge only to
# meet the min_workers constraint and the demand.
new_types["p2.8xlarge"]["min_workers"] = 3
scheduler = ResourceDemandScheduler(provider, new_types, 10)
to_launch = scheduler.get_nodes_to_launch(nodes, pending_nodes, demands,
utilizations)
# Make sure it does not return [("p2.8xlarge", 1), ("p2.xlarge", 1)]
assert to_launch == [("p2.8xlarge", 1)]
def test_get_nodes_to_launch_limits():
provider = MockProvider()
scheduler = ResourceDemandScheduler(provider, TYPES_A, 3)
@@ -299,6 +411,58 @@ class AutoscalingTest(unittest.TestCase):
autoscaler.update()
self.waitForNodes(2)
def testScaleUpMinWorkers(self):
config = copy.deepcopy(MULTI_WORKER_CLUSTER)
config["min_workers"] = 2
config["max_workers"] = 50
config["idle_timeout_minutes"] = 1
# Since config["min_workers"] > 1, the remaining worker is started
# with the default worker node type.
config["available_node_types"]["p2.8xlarge"]["min_workers"] = 1
config_path = self.write_config(config)
self.provider = MockProvider()
runner = MockProcessRunner()
lm = LoadMetrics()
autoscaler = StandardAutoscaler(
config_path,
lm,
max_failures=0,
process_runner=runner,
update_interval_s=0)
assert len(self.provider.non_terminated_nodes({})) == 0
autoscaler.update()
self.waitForNodes(2)
assert len(self.provider.mock_nodes) == 2
assert {
self.provider.mock_nodes[0].node_type,
self.provider.mock_nodes[1].node_type
} == {"p2.8xlarge", "m4.large"}
self.provider.create_node({}, {
TAG_RAY_USER_NODE_TYPE: "p2.8xlarge",
TAG_RAY_NODE_KIND: NODE_KIND_WORKER
}, 2)
self.provider.create_node({}, {
TAG_RAY_USER_NODE_TYPE: "m4.16xlarge",
TAG_RAY_NODE_KIND: NODE_KIND_WORKER
}, 2)
assert len(self.provider.non_terminated_nodes({})) == 6
# Make sure that after idle_timeout_minutes we don't kill idle
# min workers.
for node_id in self.provider.non_terminated_nodes({}):
lm.last_used_time_by_ip[self.provider.internal_ip(node_id)] = -60
autoscaler.update()
self.waitForNodes(2)
cnt = 0
for id in self.provider.mock_nodes:
if self.provider.mock_nodes[id].state == "running" or \
self.provider.mock_nodes[id].state == "pending":
assert self.provider.mock_nodes[id].node_type in {
"p2.8xlarge", "m4.large"
}
cnt += 1
assert cnt == 2
def testScaleUpIgnoreUsed(self):
config = MULTI_WORKER_CLUSTER.copy()
# Commenting out this line causes the test case to fail?!?!