mirror of
https://github.com/wassname/ray.git
synced 2026-07-03 11:10:25 +08:00
[autoscaler] Fixing AWS instance types autofill (#11758)
This commit is contained in:
@@ -4,7 +4,7 @@ import threading
|
||||
from collections import defaultdict
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import boto3
|
||||
import botocore
|
||||
@@ -52,6 +52,35 @@ def make_ec2_client(region, max_retries, aws_credentials=None):
|
||||
"ec2", region_name=region, config=config, **aws_credentials)
|
||||
|
||||
|
||||
def list_ec2_instances(region: str) -> List[Dict[str, Any]]:
|
||||
"""Get all instance-types/resources available in the user's AWS region.
|
||||
Args:
|
||||
region (str): the region of the AWS provider. e.g., "us-west-2".
|
||||
Returns:
|
||||
final_instance_types: a list of instances. An example of one element in
|
||||
the list:
|
||||
{'InstanceType': 'm5a.xlarge', 'ProcessorInfo':
|
||||
{'SupportedArchitectures': ['x86_64'], 'SustainedClockSpeedInGhz':
|
||||
2.5},'VCpuInfo': {'DefaultVCpus': 4, 'DefaultCores': 2,
|
||||
'DefaultThreadsPerCore': 2, 'ValidCores': [2],
|
||||
'ValidThreadsPerCore': [1, 2]}, 'MemoryInfo': {'SizeInMiB': 16384},
|
||||
...}
|
||||
|
||||
"""
|
||||
final_instance_types = []
|
||||
instance_types = boto3.client(
|
||||
"ec2", region_name=region).describe_instance_types()
|
||||
final_instance_types.extend(copy.deepcopy(instance_types["InstanceTypes"]))
|
||||
while "NextToken" in instance_types:
|
||||
instance_types = boto3.client(
|
||||
"ec2", region_name=region).describe_instance_types(
|
||||
NextToken=instance_types["NextToken"])
|
||||
final_instance_types.extend(
|
||||
copy.deepcopy(instance_types["InstanceTypes"]))
|
||||
|
||||
return final_instance_types
|
||||
|
||||
|
||||
class AWSNodeProvider(NodeProvider):
|
||||
def __init__(self, provider_config, cluster_name):
|
||||
NodeProvider.__init__(self, provider_config, cluster_name)
|
||||
@@ -456,8 +485,8 @@ class AWSNodeProvider(NodeProvider):
|
||||
return cluster_config
|
||||
cluster_config = copy.deepcopy(cluster_config)
|
||||
|
||||
instances_list = boto3.client("ec2").describe_instance_types()[
|
||||
"InstanceTypes"]
|
||||
instances_list = list_ec2_instances(
|
||||
cluster_config["provider"]["region"])
|
||||
instances_dict = {
|
||||
instance["InstanceType"]: instance
|
||||
for instance in instances_list
|
||||
@@ -488,5 +517,8 @@ class AWSNodeProvider(NodeProvider):
|
||||
"resources"] = autodetected_resources
|
||||
cli_logger.print("Updating the resources of {} to {}.",
|
||||
node_type, autodetected_resources)
|
||||
|
||||
else:
|
||||
raise ValueError("Instance type " + instance_type +
|
||||
" is not available in AWS region: " +
|
||||
cluster_config["provider"]["region"] + ".")
|
||||
return cluster_config
|
||||
|
||||
@@ -105,10 +105,8 @@ def fillout_defaults(config: Dict[str, Any]) -> Dict[str, Any]:
|
||||
try:
|
||||
defaults = _fillout_available_node_types_resources(defaults)
|
||||
except Exception:
|
||||
# We don't want to introduce new errors with filling available node
|
||||
# types resources feature.
|
||||
logger.exception("Failed to autodetect node resources")
|
||||
|
||||
logger.exception("Failed to autodetect node resources.")
|
||||
raise
|
||||
return defaults
|
||||
|
||||
|
||||
|
||||
@@ -23,8 +23,10 @@ CONFIG_PATHS += recursive_fnmatch(
|
||||
class AutoscalingConfigTest(unittest.TestCase):
|
||||
def testValidateDefaultConfig(self):
|
||||
for config_path in CONFIG_PATHS:
|
||||
if "aws/example-multi-node-type.yaml" in config_path:
|
||||
# This is tested in testValidateDefaultConfigAWSMultiNodeTypes.
|
||||
if ("aws/example-multi-node-type.yaml" in config_path
|
||||
or "staroid/example-multi-node-type.yaml" in config_path):
|
||||
# aws is tested in testValidateDefaultConfigAWSMultiNodeTypes.
|
||||
# staroid fails as it requires an installation of staroid.
|
||||
continue
|
||||
with open(config_path) as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
Reference in New Issue
Block a user