Automatically detect CPU, GPU, accelerator_type for AWS (#11147)

This commit is contained in:
Ameer Haj Ali
2020-10-02 21:16:43 -07:00
committed by GitHub
parent 6974cea0cd
commit 6b86d4d280
6 changed files with 172 additions and 12 deletions
+77
View File
@@ -5,6 +5,8 @@ import tempfile
import unittest
import urllib
import yaml
import copy
from unittest.mock import MagicMock, Mock, patch
from ray.autoscaler._private.util import prepare_config, validate_config
from ray.test_utils import recursive_fnmatch
@@ -20,6 +22,9 @@ CONFIG_PATHS += recursive_fnmatch(
class AutoscalingConfigTest(unittest.TestCase):
def testValidateDefaultConfig(self):
for config_path in CONFIG_PATHS:
if "aws/example-multi-node-type.yaml" in config_path:
# This is tested in testValidateDefaultConfigAWSMultiNodeTypes.
continue
with open(config_path) as f:
config = yaml.safe_load(f)
config = prepare_config(config)
@@ -28,6 +33,78 @@ class AutoscalingConfigTest(unittest.TestCase):
except Exception:
self.fail("Config did not pass validation test!")
def testValidateDefaultConfigAWSMultiNodeTypes(self):
aws_config_path = os.path.join(
RAY_PATH, "autoscaler/aws/example-multi-node-type.yaml")
with open(aws_config_path) as f:
config = yaml.safe_load(f)
new_config = copy.deepcopy(config)
# modify it here
new_config["available_node_types"] = {
"cpu_4_ondemand": new_config["available_node_types"][
"cpu_4_ondemand"],
"cpu_16_spot": new_config["available_node_types"]["cpu_16_spot"],
"gpu_8_ondemand": new_config["available_node_types"][
"gpu_8_ondemand"]
}
orig_new_config = copy.deepcopy(new_config)
expected_available_node_types = orig_new_config["available_node_types"]
expected_available_node_types["cpu_4_ondemand"]["resources"] = {
"CPU": 4
}
expected_available_node_types["cpu_16_spot"]["resources"] = {
"CPU": 16,
"Custom1": 1,
"is_spot": 1
}
expected_available_node_types["gpu_8_ondemand"]["resources"] = {
"CPU": 32,
"GPU": 4,
"accelerator_type:V100": 1
}
boto3_dict = {
"InstanceTypes": [{
"InstanceType": "m4.xlarge",
"VCpuInfo": {
"DefaultVCpus": 4
}
}, {
"InstanceType": "m4.4xlarge",
"VCpuInfo": {
"DefaultVCpus": 16
}
}, {
"InstanceType": "p3.8xlarge",
"VCpuInfo": {
"DefaultVCpus": 32
},
"GpuInfo": {
"Gpus": [{
"Name": "V100",
"Count": 4
}]
}
}]
}
boto3_mock = Mock()
describe_instance_types_mock = Mock()
describe_instance_types_mock.describe_instance_types = MagicMock(
return_value=boto3_dict)
boto3_mock.client = MagicMock(
return_value=describe_instance_types_mock)
with patch.multiple(
"ray.autoscaler._private.aws.node_provider",
boto3=boto3_mock,
):
new_config = prepare_config(new_config)
try:
validate_config(new_config)
expected_available_node_types == new_config["available_node_types"]
except Exception:
self.fail("Config did not pass multi node types auto fill test!")
def testValidateNetworkConfig(self):
web_yaml = "https://raw.githubusercontent.com/ray-project/ray/" \
"master/python/ray/autoscaler/aws/example-full.yaml"