mirror of
https://github.com/wassname/ray.git
synced 2026-07-01 11:10:02 +08:00
3c91ff1f63
* Allowing users to provide custom key names & security group inbound rules * linting * getting aws credentials passed in * one more thing * one more thing part 2 * formatting * addressing comments * update * update * update * update * update * update * remove tests * rerun tests Co-authored-by: Allen Yin <allenyin@anyscale.io>
242 lines
9.6 KiB
JSON
242 lines
9.6 KiB
JSON
{
|
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
"$id": "http://github.com/ray-project/ray/python/ray/autoscaler/ray-schema.json",
|
|
"title": "Ray AutoScaler",
|
|
"description": "Ray autoscaler schema",
|
|
"type": "object",
|
|
"definitions": {
|
|
"commands": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string",
|
|
"description": "shell command"
|
|
}
|
|
}
|
|
},
|
|
"required": [
|
|
"cluster_name",
|
|
"provider"
|
|
],
|
|
"additionalProperties": false,
|
|
"properties": {
|
|
"cluster_name": {
|
|
"description": "An unique identifier for the head node and workers of this cluster.",
|
|
"type": "string"
|
|
},
|
|
"min_workers": {
|
|
"description": "The minimum number of workers nodes to launch in addition to the head node. This number should be >= 0",
|
|
"type": "integer",
|
|
"minimum": 0
|
|
},
|
|
"max_workers": {
|
|
"description": "The maximum number of workers nodes to launch in addition to the head node. This takes precedence over min_workers.",
|
|
"type": "integer",
|
|
"minimum": 0
|
|
},
|
|
"initial_workers": {
|
|
"description": "The number of workers to launch initially, in addition to the head node.",
|
|
"type": "integer",
|
|
"minimum": 0
|
|
},
|
|
"autoscaling_mode": {
|
|
"description": "The mode of the autoscaler e.g. default, aggressive",
|
|
"type": "string",
|
|
"enum": [ "default", "aggressive" ]
|
|
},
|
|
"target_utilization_fraction": {
|
|
"description": "The autoscaler will scale up the cluster to this target fraction of resources usage. For example, if a cluster of 8 nodes is 100% busy # and target_utilization was 0.8, it would resize the cluster to 10.",
|
|
"type": "number",
|
|
"minimum": 0,
|
|
"maximum": 1
|
|
},
|
|
"idle_timeout_minutes": {
|
|
"description": "If a node is idle for this many minutes, it will be removed.",
|
|
"type": "integer",
|
|
"minimum": 0
|
|
},
|
|
"provider": {
|
|
"type": "object",
|
|
"description": "Cloud-provider specific configuration.",
|
|
"required": [ "type" ],
|
|
"additionalProperties": true,
|
|
"properties": {
|
|
"type": {
|
|
"type": "string",
|
|
"description": "e.g. aws, azure, gcp,..."
|
|
},
|
|
"region": {
|
|
"type": "string",
|
|
"description": "e.g. us-east-1"
|
|
},
|
|
"module": {
|
|
"type": "string",
|
|
"description": "module, if using external node provider"
|
|
},
|
|
"head_ip": {
|
|
"type": "string",
|
|
"description": "gcp project id, if using gcp"
|
|
},
|
|
"worker_ips": {
|
|
"type": "array",
|
|
"description": "local cluster head node"
|
|
},
|
|
"use_internal_ips": {
|
|
"type": "boolean",
|
|
"description": "don't require public ips"
|
|
},
|
|
"namespace": {
|
|
"type": "string",
|
|
"description": "k8s namespace, if using k8s"
|
|
},
|
|
"location": {
|
|
"type": "string",
|
|
"description": "Azure location"
|
|
},
|
|
"resource_group": {
|
|
"type": "string",
|
|
"description": "Azure resource group"
|
|
},
|
|
"tags": {
|
|
"type": "object",
|
|
"description": "Azure user-defined tags"
|
|
},
|
|
"subscription_id": {
|
|
"type": "string",
|
|
"description": "Azure subscription id"
|
|
},
|
|
"msi_identity_id": {
|
|
"type": "string",
|
|
"description": "User-defined managed identity (generated by config)"
|
|
},
|
|
"msi_identity_principal_id": {
|
|
"type": "string",
|
|
"description": "User-defined managed identity principal id (generated by config)"
|
|
},
|
|
"subnet_id": {
|
|
"type": "string",
|
|
"description": "Network subnet id"
|
|
},
|
|
"autoscaler_service_account": {
|
|
"type": "object",
|
|
"description": "k8s autoscaler permissions, if using k8s"
|
|
},
|
|
"autoscaler_role": {
|
|
"type": "object",
|
|
"description": "k8s autoscaler permissions, if using k8s"
|
|
},
|
|
"autoscaler_role_binding": {
|
|
"type": "object",
|
|
"description": "k8s autoscaler permissions, if using k8s"
|
|
},
|
|
"cache_stopped_nodes": {
|
|
"type": "boolean",
|
|
"description": " Whether to try to reuse previously stopped nodes instead of launching nodes. This will also cause the autoscaler to stop nodes instead of terminating them. Only implemented for AWS."
|
|
},
|
|
"availability_zone": {
|
|
"type": "string",
|
|
"description": "GCP availability zone"
|
|
},
|
|
"project_id": {
|
|
"type": ["string", "null"],
|
|
"description": "GCP globally unique project id"
|
|
}
|
|
}
|
|
},
|
|
"auth": {
|
|
"type": "object",
|
|
"description": "How Ray will authenticate with newly launched nodes.",
|
|
"additionalProperties": false,
|
|
"properties": {
|
|
"ssh_user": {
|
|
"type": "string",
|
|
"default": "ubuntu"
|
|
},
|
|
"ssh_public_key": {
|
|
"type": "string"
|
|
},
|
|
"ssh_private_key": {
|
|
"type": "string"
|
|
}
|
|
}
|
|
},
|
|
"docker": {
|
|
"type": "object",
|
|
"description": "Docker configuration. If this is specified, all setup and start commands will be executed in the container.",
|
|
"additionalProperties": false,
|
|
"properties": {
|
|
"image": {
|
|
"type": "string",
|
|
"description": "the docker image name",
|
|
"default": "tensorflow/tensorflow:1.5.0-py3"
|
|
},
|
|
"container_name": {
|
|
"type": "string",
|
|
"default": "ray_docker"
|
|
},
|
|
"pull_before_run": {
|
|
"type": "boolean",
|
|
"description": "run `docker pull` first"
|
|
},
|
|
"run_options": {
|
|
"type": "array",
|
|
"description": "shared options for starting head/worker docker"
|
|
},
|
|
"head_image": {
|
|
"type": "string",
|
|
"description": "image for head node, takes precedence over 'image' if specified"
|
|
},
|
|
"head_run_options": {
|
|
"type": "array",
|
|
"description": "head specific run options, appended to run_options"
|
|
},
|
|
"worker_image": {
|
|
"type": "string",
|
|
"description": "analogous to head_image"
|
|
},
|
|
"worker_run_options": {
|
|
"type": "array",
|
|
"description": "analogous to head_run_options"
|
|
}
|
|
}
|
|
},
|
|
"head_node": {
|
|
"type": "object",
|
|
"description": "Provider-specific config for the head node, e.g. instance type."
|
|
},
|
|
"worker_nodes": {
|
|
"type": "object",
|
|
"description": "Provider-specific config for worker nodes. e.g. instance type."
|
|
},
|
|
"file_mounts": {
|
|
"type": "object",
|
|
"description": "Map of remote paths to local paths, e.g. {\"/tmp/data\": \"/my/local/data\"}"
|
|
},
|
|
"initialization_commands": {
|
|
"$ref": "#/definitions/commands",
|
|
"description": "List of commands that will be run before `setup_commands`. If docker is enabled, these commands will run outside the container and before docker is setup."
|
|
},
|
|
"setup_commands": {
|
|
"$ref": "#/definitions/commands",
|
|
"description": "List of common shell commands to run to setup nodes."
|
|
},
|
|
"head_setup_commands": {
|
|
"$ref": "#/definitions/commands",
|
|
"description": "Commands that will be run on the head node after common setup."
|
|
},
|
|
"worker_setup_commands": {
|
|
"$ref": "#/definitions/commands",
|
|
"description": "Commands that will be run on worker nodes after common setup."
|
|
},
|
|
"head_start_ray_commands": {
|
|
"$ref": "#/definitions/commands",
|
|
"description": "Command to start ray on the head node. You shouldn't need to modify this."
|
|
},
|
|
"worker_start_ray_commands": {
|
|
"$ref": "#/definitions/commands",
|
|
"description": "Command to start ray on worker nodes. You shouldn't need to modify this."
|
|
},
|
|
"no_restart": {
|
|
"description": "Whether to avoid restarting the cluster during updates. This field is controlled by the ray --no-restart flag and cannot be set by the user."
|
|
}
|
|
}
|
|
} |