mirror of
https://github.com/wassname/ray.git
synced 2026-07-01 20:40:56 +08:00
[autoscaler] Azure versioning (#8168)
This commit is contained in:
@@ -71,11 +71,17 @@
|
||||
{
|
||||
"name": "ray-subnet",
|
||||
"properties": {
|
||||
"addressPrefix": "[parameters('subnet')]"
|
||||
"addressPrefix": "[parameters('subnet')]",
|
||||
"networkSecurityGroup": {
|
||||
"id": "[resourceId('Microsoft.Network/networkSecurityGroups','ray-nsg')]"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"dependsOn": [
|
||||
"[resourceId('Microsoft.Network/networkSecurityGroups', 'ray-nsg')]"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -54,29 +54,32 @@ def _configure_resource_group(config):
|
||||
resource_client.resource_groups.create_or_update(
|
||||
resource_group_name=resource_group, parameters=params)
|
||||
|
||||
# load the template
|
||||
template_path = os.path.join(
|
||||
os.path.dirname(__file__), "azure-config-template.json")
|
||||
with open(template_path, "r") as template_file_fd:
|
||||
template = json.load(template_file_fd)
|
||||
# load the template file
|
||||
current_path = os.path.dirname(os.path.abspath(__file__))
|
||||
template_path = os.path.join(current_path, "azure-config-template.json")
|
||||
with open(template_path, "r") as template_fp:
|
||||
template = json.load(template_fp)
|
||||
|
||||
# choose a random subnet
|
||||
# choose a random subnet, skipping most common value of 0
|
||||
random.seed(resource_group)
|
||||
# start at 1 to avoid most likely collision at 0
|
||||
parameters = {"subnet": "10.{}.0.0/16".format(random.randint(1, 254))}
|
||||
subnet_mask = "10.{}.0.0/16".format(random.randint(1, 254))
|
||||
|
||||
deployment_properties = {
|
||||
"mode": DeploymentMode.incremental,
|
||||
"template": template,
|
||||
"parameters": {k: {
|
||||
"value": v
|
||||
parameters = {
|
||||
"properties": {
|
||||
"mode": DeploymentMode.incremental,
|
||||
"template": template,
|
||||
"parameters": {
|
||||
"subnet": {
|
||||
"value": subnet_mask
|
||||
}
|
||||
}
|
||||
}
|
||||
for k, v in parameters.items()}
|
||||
}
|
||||
|
||||
deployment_async_operation = resource_client.deployments.create_or_update(
|
||||
resource_group, "ray-config", deployment_properties)
|
||||
deployment_async_operation.wait()
|
||||
resource_client.deployments.create_or_update(
|
||||
resource_group_name=resource_group,
|
||||
deployment_name="ray-config",
|
||||
parameters=parameters).wait()
|
||||
|
||||
return config
|
||||
|
||||
|
||||
@@ -114,7 +114,7 @@ setup_commands:
|
||||
|
||||
# Custom commands that will be run on the head node after common setup.
|
||||
head_setup_commands:
|
||||
- pip install azure-cli-core azure-core azure-mgmt-authorization azure-mgmt-network azure-mgmt-compute azure-mgmt-msi
|
||||
- pip install azure-cli-core==2.4.0 azure-mgmt-compute==12.0.0 azure-mgmt-msi==1.0.0 azure-mgmt-network==10.1.0
|
||||
|
||||
# Custom commands that will be run on worker nodes after common setup.
|
||||
worker_setup_commands: []
|
||||
|
||||
@@ -84,7 +84,7 @@ setup_commands:
|
||||
|
||||
# Custom commands that will be run on the head node after common setup.
|
||||
head_setup_commands:
|
||||
- pip install azure-cli-core azure-core azure-mgmt-authorization azure-mgmt-compute azure-mgmt-msi azure-mgmt-network
|
||||
- pip install azure-cli-core==2.4.0 azure-mgmt-compute==12.0.0 azure-mgmt-msi==1.0.0 azure-mgmt-network==10.1.0
|
||||
|
||||
# Custom commands that will be run on worker nodes after common setup.
|
||||
worker_setup_commands: []
|
||||
|
||||
@@ -116,7 +116,7 @@ setup_commands:
|
||||
|
||||
# Custom commands that will be run on the head node after common setup.
|
||||
head_setup_commands:
|
||||
- pip install azure-cli-core azure-core azure-mgmt-authorization azure-mgmt-network azure-mgmt-compute azure-mgmt-msi
|
||||
- pip install azure-cli-core==2.4.0 azure-mgmt-compute==12.0.0 azure-mgmt-msi==1.0.0 azure-mgmt-network==10.1.0
|
||||
|
||||
# Custom commands that will be run on worker nodes after common setup.
|
||||
worker_setup_commands: []
|
||||
|
||||
@@ -176,11 +176,11 @@ class AzureNodeProvider(NodeProvider):
|
||||
# TODO: restart deallocated nodes if possible
|
||||
resource_group = self.provider_config["resource_group"]
|
||||
|
||||
# load the template
|
||||
template_path = os.path.join(
|
||||
os.path.dirname(__file__), "azure-vm-template.json")
|
||||
with open(template_path, "r") as template_file_fd:
|
||||
template = json.load(template_file_fd)
|
||||
# load the template file
|
||||
current_path = os.path.dirname(os.path.abspath(__file__))
|
||||
template_path = os.path.join(current_path, "azure-vm-template.json")
|
||||
with open(template_path, "r") as template_fp:
|
||||
template = json.load(template_fp)
|
||||
|
||||
# get the tags
|
||||
config_tags = node_config.get("tags", {}).copy()
|
||||
@@ -189,28 +189,33 @@ class AzureNodeProvider(NodeProvider):
|
||||
|
||||
name_tag = config_tags.get(TAG_RAY_NODE_NAME, "node")
|
||||
unique_id = uuid4().hex[:VM_NAME_UUID_LEN]
|
||||
vm_name = "{name}-{id}".format(name=name_tag, id=unique_id)
|
||||
use_internal_ips = self.provider_config.get("use_internal_ips", False)
|
||||
|
||||
parameters = node_config["azure_arm_parameters"].copy()
|
||||
parameters["vmName"] = "{name}-{id}".format(
|
||||
name=name_tag, id=unique_id)
|
||||
parameters["provisionPublicIp"] = not self.provider_config.get(
|
||||
"use_internal_ips", False)
|
||||
parameters["vmTags"] = config_tags
|
||||
parameters["vmCount"] = count
|
||||
template_params = node_config["azure_arm_parameters"].copy()
|
||||
template_params["vmName"] = vm_name
|
||||
template_params["provisionPublicIp"] = not use_internal_ips
|
||||
template_params["vmTags"] = config_tags
|
||||
template_params["vmCount"] = count
|
||||
|
||||
deployment_properties = {
|
||||
"mode": DeploymentMode.incremental,
|
||||
"template": template,
|
||||
"parameters": {k: {
|
||||
"value": v
|
||||
parameters = {
|
||||
"properties": {
|
||||
"mode": DeploymentMode.incremental,
|
||||
"template": template,
|
||||
"parameters": {
|
||||
key: {
|
||||
"value": value
|
||||
}
|
||||
for key, value in template_params.items()
|
||||
}
|
||||
}
|
||||
for k, v in parameters.items()}
|
||||
}
|
||||
|
||||
# TODO: we could get the private/public ips back directly
|
||||
self.resource_client.deployments.create_or_update(
|
||||
resource_group, "ray-vm-{}".format(name_tag),
|
||||
deployment_properties).wait()
|
||||
resource_group_name=resource_group,
|
||||
deployment_name="ray-vm-{}".format(name_tag),
|
||||
parameters=parameters).wait()
|
||||
|
||||
@synchronized
|
||||
def set_node_tags(self, node_id, tags):
|
||||
|
||||
Reference in New Issue
Block a user